# Linear Regression

In [122]:
#Importing required libs
import math
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.isotonic import IsotonicRegression

from sklearn.metrics import confusion_matrix, classification_report
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split
import pickle
%matplotlib inline

In [155]:
# Constants

FILE_PATH = "."
FILE_NAME = "corona-19.xlsx"
SHEET_NAME = "COVID-19-geographic-disbtributi"
NAMES = ["Cases", "Deaths" , "Countries and territories", "GeoId"]
GROUP_BY = ["GeoId", "Countries and territories"]
FILE = 'isotonic_regression_corona.sav'

def load_excel_file(names, file_name, file_path, sheet_name = None):
    
    dataframe = None
    try:
        data = pd.read_excel(f"{file_path}/{file_name}", sheet_name = sheet_name)
        validate_dataframe = isinstance(data, pd.DataFrame)
        if validate_dataframe:
            dataframe = pd.DataFrame(data, columns = names)
            return dataframe
        
        for sheet, values in data.items():
            # Get only the first Dataframe
            dataframe = pd.DataFrame(values, columns = names)
            return dataframe
    except Exception as e:
        raise e

def clean_null_values(dataframe):
    cleaned_df = dataframe
    null_values = dataframe.isnull().values.any()
    if null_values:
        cleaned_df = dataframe.dropna()
    return cleaned_df

def group_dataframe(dataframe, group):
    dataframe_grouped = dataframe.groupby(group).sum()
    dataframe_grouped = dataframe_grouped.reset_index()
    return dataframe_grouped

def split_dataframe(dataframe, test_size = 0.2):
    label = df_grouped['Cases'].values
    feature = df_grouped['Deaths'].values
    
    X_train, X_test, y_train, y_test = train_test_split(label, feature, test_size=test_size, random_state=42)
    return X_train, X_test, y_train, y_test

def train_model(X_train, y_train):
    ir = IsotonicRegression()
    ir.fit(X_train, y_train)
    
    return ir

def test_values(X_test, model):
    X_test = values_for_train_test[1]
    X_predict = model.predict(X_test)
    return X_predict

df = load_excel_file(NAMES, FILE_NAME, FILE_PATH, SHEET_NAME)
df_cleaned = clean_null_values(df)
df_grouped = group_dataframe(df_cleaned, GROUP_BY)

values_for_train_test = split_dataframe(df_grouped)

X_train = values_for_train_test[0]
X_test = values_for_train_test[1]
    
y_train = values_for_train_test[2]
y_test = values_for_train_test[3]

model_ir = train_model(X_train, y_train)

# Save the model to disk
pickle.dump(model_ir, open(FILE, 'wb+'))

In [None]:
### Predict

In [156]:
# load the model from disk
number_of_cases = 4900
loaded_model = pickle.load(open(filename, 'rb'))
result = loaded_model.predict([number_of_cases])
print(round(float(result)))

96
