In [26]:
import joblib
import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score

### Using the Saved model to predict Rain in Australia dataset

In [3]:
aussie_rain2 = joblib.load('aussie_rain_model.joblib')

#### Let's use the loaded model to make predictions on a single new input.

In [14]:
aussie_rain2['model']

LogisticRegression(solver='liblinear')

In [19]:
aussie_rain2

{'model': LogisticRegression(solver='liblinear'),
 'imputer': SimpleImputer(),
 'scaler': MinMaxScaler(),
 'encoder': OneHotEncoder(handle_unknown='ignore', sparse=False),
 'input_cols': ['Location',
  'MinTemp',
  'MaxTemp',
  'Rainfall',
  'Evaporation',
  'Sunshine',
  'WindGustDir',
  'WindGustSpeed',
  'WindDir9am',
  'WindDir3pm',
  'WindSpeed9am',
  'WindSpeed3pm',
  'Humidity9am',
  'Humidity3pm',
  'Pressure9am',
  'Pressure3pm',
  'Cloud9am',
  'Cloud3pm',
  'Temp9am',
  'Temp3pm',
  'RainToday'],
 'target_col': 'RainTomorrow',
 'numeric_cols': ['MinTemp',
  'MaxTemp',
  'Rainfall',
  'Evaporation',
  'Sunshine',
  'WindGustSpeed',
  'WindSpeed9am',
  'WindSpeed3pm',
  'Humidity9am',
  'Humidity3pm',
  'Pressure9am',
  'Pressure3pm',
  'Cloud9am',
  'Cloud3pm',
  'Temp9am',
  'Temp3pm'],
 'categorical_cols': ['Location',
  'WindGustDir',
  'WindDir9am',
  'WindDir3pm',
  'RainToday'],
 'encoded_cols': ['Location_Adelaide',
  'Location_Albany',
  'Location_Albury',
  'Location

In [15]:
new_input = {'Date': '2021-06-19',
             'Location': 'Katherine',
             'MinTemp': 23.2,
             'MaxTemp': 33.2,
             'Rainfall': 10.2,
             'Evaporation': 4.2,
             'Sunshine': np.nan,
             'WindGustDir': 'NNW',
             'WindGustSpeed': 52.0,
             'WindDir9am': 'NW',
             'WindDir3pm': 'NNE',
             'WindSpeed9am': 13.0,
             'WindSpeed3pm': 20.0,
             'Humidity9am': 89.0,
             'Humidity3pm': 58.0,
             'Pressure9am': 1004.8,
             'Pressure3pm': 1001.5,
             'Cloud9am': 8.0,
             'Cloud3pm': 5.0,
             'Temp9am': 25.7,
             'Temp3pm': 33.0,
             'RainToday': 'Yes'}

In [16]:
def predict_input(single_input):
    input_df = pd.DataFrame([single_input])
    input_df[aussie_rain2['numeric_cols']] = aussie_rain2['imputer'].transform(input_df[aussie_rain2['numeric_cols']])
    input_df[aussie_rain2['numeric_cols']] = aussie_rain2['scaler'].transform(input_df[aussie_rain2['numeric_cols']])
    input_df[aussie_rain2['encoded_cols']] = aussie_rain2['encoder'].transform(input_df[aussie_rain2['categorical_cols']])
    X_input = input_df[aussie_rain2['numeric_cols'] + aussie_rain2['encoded_cols']]
    pred = aussie_rain2['model'].predict(X_input)[0]
    prob = aussie_rain2['model'].predict_proba(X_input)[0][list(aussie_rain2['model'].classes_).index(pred)]
    return pred, prob

In [17]:
predict_input(new_input)

('Yes', 0.5189715005971175)

#### Let's use the loaded model to make predictions on dataset which was saved in parquet file.

In [20]:
%%time

train_inputs = pd.read_parquet('train_inputs.parquet')
valid_inputs = pd.read_parquet('valid_inputs.parquet')
test_inputs = pd.read_parquet('test_inputs.parquet')

train_targets = pd.read_parquet('train_targets.parquet')[aussie_rain2['target_col']]
valid_targets = pd.read_parquet('valid_targets.parquet')[aussie_rain2['target_col']]
test_targets = pd.read_parquet('test_targets.parquet')[aussie_rain2['target_col']]

Wall time: 587 ms


In [21]:
# Let's verify that the data was loaded properly.
print('train_inputs:', train_inputs.shape)
print('train_targets:', train_targets.shape)
print('valid_inputs:', valid_inputs.shape)
print('valid_targets:', valid_targets.shape)
print('test_inputs:', test_inputs.shape)
print('test_targets:', test_targets.shape)

train_inputs: (97988, 123)
train_targets: (97988,)
valid_inputs: (17089, 123)
valid_targets: (17089,)
test_inputs: (25710, 123)
test_targets: (25710,)


In [22]:
X_train = train_inputs[aussie_rain2['numeric_cols'] + aussie_rain2['encoded_cols']]
X_val = valid_inputs[aussie_rain2['numeric_cols'] + aussie_rain2['encoded_cols']]
X_test = test_inputs[aussie_rain2['numeric_cols'] + aussie_rain2['encoded_cols']]

In [27]:
test_preds = aussie_rain2['model'].predict(X_test)
accuracy_score(test_targets, test_preds)

0.8420070011668611

#### END