In [None]:
def launch_fe(data):
    import os
    import pandas as pd
    import numpy as np
    from io import StringIO
    import json
    from sklearn.model_selection import train_test_split
    from sklearn.feature_extraction.text import CountVectorizer
    from sklearn.feature_extraction.text import TfidfVectorizer
    from sklearn.impute import SimpleImputer
    from sklearn.preprocessing import OneHotEncoder
    from sklearn.feature_extraction import text
    import pickle
    from scipy import sparse
    MAX_TEXT_FEATURES = 200
    columns_list = ["Location", "Hour", "Season", "Date", "Time", "Altitude", "Latitude", "Longitude", "YRMODAHRMI", "Month", "AmbientTemp", "Humidity", "Visibility", "PolyPwr", "Wind.Speed", "Pressure", "Cloud.Ceiling"]

    dataset = pd.read_csv(data, skipinitialspace=True)

    # Replace inf and -inf, with max and min values of the particular column
    df = dataset.select_dtypes(include=np.number)
    cols = df.columns.to_series()[np.isinf(df).any()]
    col_min_max = {np.inf: dataset[cols][np.isfinite(dataset[cols])].max(), -np.inf: dataset[cols][np.isfinite(dataset[cols])].min()}
    dataset[cols] = dataset[cols].replace({col: col_min_max for col in cols})

    num_samples = len(dataset)

    # One hot encode categorical values
    encode_features = ["Location", "Season"]
    one_hot_encode_model = \
        OneHotEncoder(handle_unknown='ignore', sparse=False).fit(dataset[encode_features])
    # Save the model
    model_name = "ed139373-2254-4c94-b084-33a55d53bdf4"
    fh = open(model_name, "wb")
    pickle.dump(one_hot_encode_model, fh)
    fh.close()

    encode_features = ["Location", "Season"]
    new_features = \
        one_hot_encode_model.transform(dataset[encode_features])
    new_feature_names = \
        one_hot_encode_model.get_feature_names_out(encode_features)
    if (sparse.issparse(new_features)):
        new_features = new_features.toarray()
    dataframe = pd.DataFrame(new_features, columns=new_feature_names)
    dataset = dataset.drop(encode_features, axis=1)
    # reset_index to re-order the index of the new dataframe.
    dataset = pd.concat([dataset.reset_index(drop=True), dataframe.reset_index(drop=True)], axis=1)

    # Move the label column
    cols = list(dataset.columns)
    colIdx = dataset.columns.get_loc("PolyPwr")
    # Do nothing if the label is in the 0th position
    # Otherwise, change the order of columns to move label to 0th position
    if colIdx != 0:
        cols = cols[colIdx:colIdx+1] + cols[0:colIdx] + cols[colIdx+1:]
        dataset = dataset[cols]

    # split dataset into train and test
    train, test = train_test_split(dataset, test_size=0.2, random_state=42)

    # Write train and test csv
    train.to_csv('train.csv', index=False, header=False)
    test.to_csv('test.csv', index=False, header=False)
    column_names = list(train.columns)
def get_model_id():
    return "ed139373-2254-4c94-b084-33a55d53bdf4"


In [None]:

# Upload a correct file from your local machine
from io import BytesIO
from google.colab import files
uploaded_file = files.upload()
for name in uploaded_file.keys():
    filename = name
data = BytesIO(uploaded_file[filename])


In [None]:

# Launch FE
launch_fe(data)


In [None]:

# import the library of the algorithm
from sklearn.ensemble import RandomForestRegressor

# Initialize hyperparams
max_depth = None
n_estimators = 10

# Initialize the algorithm
model = RandomForestRegressor(max_depth=max_depth, random_state=0, n_estimators=n_estimators)
algorithm = 'RandomForestRegressor'


In [None]:

import pandas as pd
# Load the test and train datasets
train = pd.read_csv('train.csv', skipinitialspace=True, header=None)
test = pd.read_csv('test.csv', skipinitialspace=True, header=None)
# Train the algorithm
model.fit(train.iloc[:,1:], train.iloc[:,0])


In [None]:

import numpy as np
# Predict the target values
y_pred = model.predict(test.iloc[:, 1:])
# calculate rmse
rmse = np.sqrt(np.mean((y_pred - test.iloc[:, 0])**2))
print('RMSE of the model is: ', rmse)
# import the library to calculate mae
from sklearn.metrics import mean_absolute_error
# calculate mae
mae = mean_absolute_error(np.array(test.iloc[:, 0]), y_pred)
print('MAE of the model is: ', mae)


In [None]:

# fe_transform function traansforms raw data into a form the model can consume
print('Below is the prediction stage of the AI')
def fe_transform(data_dict, object_path=None):
    import os
    import pandas as pd
    from io import StringIO
    import json
    from sklearn.model_selection import train_test_split
    from sklearn.feature_extraction.text import CountVectorizer
    from sklearn.feature_extraction.text import TfidfVectorizer
    from sklearn.impute import SimpleImputer
    from sklearn.preprocessing import OneHotEncoder
    from sklearn.feature_extraction import text
    import pickle
    from scipy import sparse

    dataset = pd.DataFrame([data_dict])

    encode_features = ["Location", "Season"]
    object_name = "ed139373-2254-4c94-b084-33a55d53bdf4"
    file_name = open(object_name, 'rb')
    one_hot_encode_model = pickle.load(file_name)
    new_features = \
        one_hot_encode_model.transform(dataset[encode_features])
    new_feature_names = \
        one_hot_encode_model.get_feature_names_out(encode_features)
    if (sparse.issparse(new_features)):
        new_features = new_features.toarray()
    dataframe = pd.DataFrame(new_features, columns=new_feature_names)
    dataset = dataset.drop(encode_features, axis=1)
    # reset_index to re-order the index of the new dataframe.
    dataset = pd.concat([dataset.reset_index(drop=True), dataframe.reset_index(drop=True)], axis=1)

    return dataset


In [None]:

test_sample = {'Location': 'Travis', 'Date': 20175763.5, 'Time': 1272.5, 'Altitude': 974.0, 'Latitude': 34.205, 'Longitude': -118.275, 'Hour': 13, 'YRMODAHRMI': 201757500000.0, 'Month': 6.5, 'AmbientTemp': 22.878300000000003, 'Season': 'Summer', 'Humidity': 49.993895, 'Visibility': 5.0, 'Wind.Speed': 24.5, 'Pressure': 905.6, 'Cloud.Ceiling': 361.0}
# Call FE on test_sample
test_sample_modified = fe_transform(test_sample)
# Make a prediction
prediction = model.predict(test_sample_modified)
print(prediction)
