In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import warnings 
 
warnings.simplefilter(action='ignore')

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import matplotlib.pyplot as plt 
import seaborn as sns
import plotly.express as px
from plotly.offline import init_notebook_mode, iplot
init_notebook_mode(connected=True)

#sklearn imports
from sklearn.model_selection import train_test_split # split the dataset
from sklearn.preprocessing import Normalizer, StandardScaler, MinMaxScaler # Scalers 

from sklearn.linear_model import LinearRegression
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR 

from sklearn.metrics import mean_absolute_error, mean_squared_error, accuracy_score

In [None]:
df = pd.read_csv(r'../input/temperature-forecast-project-using-ml/temp.csv')
df.shape
pd.set_option('display.max_columns', 30)

In [None]:
data = df.copy()

In [None]:
data.info()

In [None]:
data.isna().sum()

In [None]:
data['Date'][0].split('-')

In [None]:
list_of_year = []
list_of_month = []
list_of_day = []

# sort day, month and year in different list for vizualization and processing purposes

for i in data['Date']:
    try :
        split_obj = i.split('-')
        list_of_year.append(split_obj[2])
        list_of_month.append(split_obj[1])    
        list_of_day.append(split_obj[0]) 
    except AttributeError:
        list_of_year.append(np.nan)
        list_of_month.append(np.nan)
        list_of_day.append(np.nan)

In [None]:
data['year'] = list_of_year
data['month'] = list_of_month
data['day'] = list_of_day

In [None]:
data['year'] = pd.to_numeric(data['year']) 
data['month'] = pd.to_numeric(data['month']) 
data['day'] = pd.to_numeric(data['day'])

In [None]:
data.drop('Date', axis=1, inplace=True)

In [None]:
data.head()

## EDA

In [None]:
def plot_line(para):
    df = data.groupby(by=para).mean().reset_index()
    
    fig = px.line(df, x=df[para], y=['Present_Tmax', 'Present_Tmin'])
    fig.update_layout(template='plotly_dark')
    return fig.show()

In [None]:
plot_line('day')

In [None]:
data.columns

## Data Preprocessing

In [None]:
data.head()

In [None]:
data.drop('station', axis=1, inplace=True)

In [None]:
def treat_nan(df):
    for i in df.columns:
        df[i].fillna(df[i].mean(), inplace=True)

In [None]:
treat_nan(data)

# spliting data into training and target set

In [None]:
data.head()

In [None]:
X = data.drop(['Next_Tmax', 'Next_Tmin'], axis=1)
y1 = data['Next_Tmax']
y2 = data['Next_Tmin']

In [None]:
X.head()

In [None]:
def normalizer(x_train, x_test):
    scaler = Normalizer()
    x_train = scaler.fit_transform(x_train)
    x_test = scaler.fit_transform(x_test)
    return x_train, x_test

In [None]:
def minmax(x_train, x_test):
    scaler = MinMaxScaler()
    x_train = scaler.fit_transform(x_train)
    x_test = scaler.fit_transform(x_test)
    return x_train, x_test

In [None]:
def stdscaler(x_train, x_test):
    scaler = StandardScaler()
    x_train = scaler.fit_transform(x_train)
    x_test = scaler.fit_transform(x_test)
    return x_train, x_test

In [None]:
def best_model(X, y, scaler, algo):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42, shuffle=True)

    X_train = np.asarray(X_train)
    X_test = np.asarray(X_test)

    X_train, X_test = scaler(X_train, X_test)

    model = algo()

    model.fit(X_train, y_train)

    predictions = model.predict(X_test)

    mae = mean_absolute_error(predictions, y_test)
    mse = mean_squared_error(predictions, y_test)

    print(f'The MAE is {mean_absolute_error(predictions, y_test)}')
    print(f'The MSE is {mean_squared_error(predictions, y_test)}')

    fig = px.scatter(x=predictions, y=y_test, template='plotly_dark', title=f'Actual Productivity vs Predictions')
    fig.update_traces(hovertemplate='Predicted Value : %{x} <br> Actual Value: %{y}')
    fig.update_layout(hoverlabel=dict(
        font_size = 20,
        bgcolor = 'white', 
        font_family = 'Helvetica'
    ))
    fig.update_xaxes(title='Predicted Values', showgrid=False)
    fig.update_yaxes(title='Actual Values', showgrid=False)

    return predictions, y_test, mse, mae, fig.show()

In [None]:
list_of_target = [y1, y2]

list_of_algos = [LinearRegression, KNeighborsRegressor, DecisionTreeRegressor, RandomForestRegressor, SVR]

list_of_MAE = []
list_of_MSE = []

for i in list_of_algos:
    print(f"{i}")
    pred, y_test, mse, mae, plot = best_model(X, y1, normalizer, i)
    list_of_MSE.append(mse)
    list_of_MAE.append(mae)

In [None]:
msemae_y1 = pd.DataFrame()
msemae_y1['Algos'] =  ['LinearRegression', 'KNeighborsRegressor', 'DecisionTreeRegressor', 'RandomForestRegressor', 'SVR']
msemae_y1['MSE_normalizer'] = list_of_MSE
msemae_y1['MAE_normalizer'] = list_of_MAE 

In [None]:
msemae_y1.head()

In [None]:
list_of_MAE1 = []
list_of_MSE1 = []

for i in list_of_algos:
    print(f"{i}")
    pred, y_test, mse, mae, plot = best_model(X, y1, stdscaler, i)
    list_of_MSE1.append(mse)
    list_of_MAE1.append(mae)

In [None]:
msemae_y1['MSE_stdscaler'] = list_of_MSE1
msemae_y1['MAE_stdscaler'] = list_of_MAE1 

In [None]:
list_of_MAE2 = []
list_of_MSE2 = []

for i in list_of_algos:
    print(f"{i}")
    pred, y_test, mse, mae, plot = best_model(X, y1, minmax, i)
    list_of_MSE2.append(mse)
    list_of_MAE2.append(mae)

In [None]:
msemae_y1['MSE_minmax'] = list_of_MSE2
msemae_y1['MAE_minmax'] = list_of_MAE2 

In [None]:
msemae_y1.head()

In [None]:
list_of_algos = [LinearRegression, KNeighborsRegressor, DecisionTreeRegressor, RandomForestRegressor, SVR]

list_of_MAE3 = []
list_of_MSE3 = []

for i in list_of_algos:
    print(f"{i}")
    pred, y_test, mse, mae, plot = best_model(X, y2, normalizer, i)
    list_of_MSE3.append(mse)
    list_of_MAE3.append(mae)

In [None]:
msemae_y2 = pd.DataFrame()
msemae_y2['Algos'] =  ['LinearRegression', 'KNeighborsRegressor', 'DecisionTreeRegressor', 'RandomForestRegressor', 'SVR']
msemae_y2['MSE_normalizer'] = list_of_MSE3
msemae_y2['MAE_normalizer'] = list_of_MAE3

In [None]:
list_of_MAE4 = []
list_of_MSE4 = []

for i in list_of_algos:
    print(f"{i}")
    pred, y_test, mse, mae, plot = best_model(X, y2, stdscaler, i)
    list_of_MSE4.append(mse)
    list_of_MAE4.append(mae)

In [None]:
msemae_y2['MSE_stdscaler'] = list_of_MSE4
msemae_y2['MAE_stdscaler'] = list_of_MAE4

In [None]:
list_of_MAE5 = []
list_of_MSE5 = []

for i in list_of_algos:
    print(f"{i}")
    pred, y_test, mse, mae, plot = best_model(X, y2, minmax, i)
    list_of_MSE5.append(mse)
    list_of_MAE5.append(mae)

In [None]:
msemae_y2['MSE_minmax'] = list_of_MSE5
msemae_y2['MAE_minmax'] = list_of_MAE5

In [None]:
msemae_y2.head()

In [None]:
msemae_y1.head()

In [None]:
fig = px.bar(msemae_y2,  x='Algos', y=['MSE_normalizer', 'MAE_normalizer', 'MSE_stdscaler', 'MAE_stdscaler', 'MSE_minmax', 'MAE_minmax'], barmode='group')
fig.update_layout(title='Representation of MAE and MSE values of different Algorithms on the Next Min Temperature', template='plotly_dark', hoverlabel=dict(
    font_size=20,
    font_family='Arial'
))
fig.update_traces(hovertemplate='%{x} : %{y}')
fig.show()

In [None]:
fig = px.bar(msemae_y1,  x='Algos', y=['MSE_normalizer', 'MAE_normalizer', 'MSE_stdscaler', 'MAE_stdscaler', 'MSE_minmax', 'MAE_minmax'], barmode='group')
fig.update_layout(title='Representation of MAE and MSE values of different Algorithms on the Next Max Temperature', template='plotly_dark', hoverlabel=dict(
    font_size=20,
    font_family='Arial'
))
fig.update_traces(hovertemplate='%{x} : %{y}')
fig.show()