In [1]:
import pandas as pd
import numpy as np
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder 

In [2]:
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px 

Loading Parquet Files

In [3]:
rain_input = pd.read_parquet('weather_inputs.parpuet')
# train_input = pd.read_parquet('train_inputs.parpuet')
# val_input = pd.read_parquet('val_inputs.parpuet')
# test_input = pd.read_parquet('test_inputs.parpuet')

rain_target = pd.read_parquet('weather_targets.parpuet')
# train_target = pd.read_parquet('train_targets.parpuet')
# val_target = pd.read_parquet('val_targets.parpuet')
# test_targer = pd.read_parquet('test_targets.parpuet')

In [5]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import accuracy_score, confusion_matrix
import joblib

In [6]:
model2 = joblib.load('Weather_pred.joblib')
model2

{'model': LogisticRegression(solver='liblinear'),
 'imputer': SimpleImputer(),
 'scaler': MinMaxScaler(),
 'encoder': OneHotEncoder(handle_unknown='ignore', sparse=False),
 'input_cols': ['Location',
  'MinTemp',
  'MaxTemp',
  'Rainfall',
  'Evaporation',
  'Sunshine',
  'WindGustDir',
  'WindGustSpeed',
  'WindDir9am',
  'WindDir3pm',
  'WindSpeed9am',
  'WindSpeed3pm',
  'Humidity9am',
  'Humidity3pm',
  'Pressure9am',
  'Pressure3pm',
  'Cloud9am',
  'Cloud3pm',
  'Temp9am',
  'Temp3pm',
  'RainToday'],
 'target_col': 'RainTomorrow',
 'numeric_cols': ['MinTemp',
  'MaxTemp',
  'Rainfall',
  'Evaporation',
  'Sunshine',
  'WindGustSpeed',
  'WindSpeed9am',
  'WindSpeed3pm',
  'Humidity9am',
  'Humidity3pm',
  'Pressure9am',
  'Pressure3pm',
  'Cloud9am',
  'Cloud3pm',
  'Temp9am'],
 'categorical_cols': ['Location',
  'WindGustDir',
  'WindDir9am',
  'WindDir3pm',
  'RainToday'],
 'encoded_cols': ['Location_Adelaide',
  'Location_Albany',
  'Location_Albury',
  'Location_AliceSprings

In [7]:
model2['model'].coef_

array([[ 9.82945297e-01, -1.61385789e+00,  3.25684752e+00,
         7.39368590e-01, -1.66573086e+00,  6.71277020e+00,
        -8.94606508e-01, -1.47865085e+00,  5.08617728e-01,
         5.66897424e+00,  5.75142698e+00, -9.44242406e+00,
        -1.54232343e-01,  1.26927031e+00,  9.61060276e-01,
         5.96811815e-01, -5.43362082e-01,  4.84103408e-01,
         1.26967082e-02,  3.42102551e-01, -3.50290821e-01,
         1.81448852e-01,  4.25853922e-01, -4.90976887e-03,
         1.54237867e-02,  2.53766995e-01, -1.83977593e-02,
        -3.04697363e-02, -4.67297535e-01, -1.44192180e-01,
        -5.90817375e-01, -7.44585621e-01, -2.49884062e-01,
        -3.28671106e-01, -5.70918989e-01,  8.01673605e-02,
         1.40377925e-02,  5.99543919e-02, -8.77143892e-01,
        -4.41490005e-01,  1.18433531e-02, -4.59483437e-01,
        -4.60186631e-01, -7.46882059e-02,  1.94560074e-01,
         4.45679113e-01,  6.07372133e-01,  4.30391895e-01,
        -2.08914221e-02,  2.53152319e-01, -3.19391861e-0

In [8]:
numeric_cols = rain_input.select_dtypes(include=np.number).columns.tolist()[:-1]
categorical_col = rain_target.select_dtypes('object').columns.tolist()

In [9]:
imputer = SimpleImputer(strategy='mean').fit(rain_input[numeric_cols]);
scaler = MinMaxScaler().fit(rain_input[numeric_cols]);
enc = OneHotEncoder(sparse=False, handle_unknown='ignore').fit(rain_target[categorical_col]);
enc_col = list(enc.get_feature_names(categorical_col));



In [10]:
def predict_input(single_input):
    input_df = pd.DataFrame([single_input])
    input_df[numeric_cols] = imputer.transform(input_df[numeric_cols])
    input_df[numeric_cols] = scaler.transform(input_df[numeric_cols])
    input_df[enc_col] = enc.transform(input_df[categorical_col])
    X_input = input_df[numeric_cols + enc_col]
    pred = model2.predict(X_input)[0]
    prob = model2.predict_proba(X_input)[0][list(model2.classes_).index(pred)]
    return pred, prob

In [11]:
new_input = {'Date': '2021-06-19',
             'Location': 'Launceston',
             'MinTemp': 23.2,
             'MaxTemp': 33.2,
             'Rainfall': 10.2,
             'Evaporation': 4.2,
             'Sunshine': np.nan,
             'WindGustDir': 'NNW',
             'WindGustSpeed': 52.0,
             'WindDir9am': 'NW',
             'WindDir3pm': 'NNE',
             'WindSpeed9am': 13.0,
             'WindSpeed3pm': 20.0,
             'Humidity9am': 89.0,
             'Humidity3pm': 58.0,
             'Pressure9am': 1004.8,
             'Pressure3pm': 1001.5,
             'Cloud9am': 8.0,
             'Cloud3pm': 5.0,
             'Temp9am': 25.7,
             'Temp3pm': 33.0,
             'RainToday': 'Yes'}

In [12]:
predict_input(new_input)