In [None]:
import joblib
import pandas as pd
from models.MeanEncoder import MeanEncoder

In [None]:
# Load the model, scaler and encoders
model = joblib.load('models/gbc_2023_01_to_04.joblib') # Add prefix for pyanywhere - /home/natuyuki/ml_webapp/
scaler = joblib.load('models/scaler.joblib') # Add prefix for pyanywhere - /home/natuyuki/ml_webapp/

# mean_encoder = joblib.load('models/mean_encoder.joblib')
# Alternative to pickling my own Class, set the encoder using a json
mean_encoder = MeanEncoder()
mean_encoder.set_from_json('static/encoding_dict.json') # Add prefix for pyanywhere - /home/natuyuki/ml_webapp/

In [None]:
def distance_to(df_series : pd.Series, to_address : str , dist_type : str='latlong', verbose : int=0):
    '''
    Function to determine distance to a location (from a series of locations in a dataframe
    ## Parameters
    df_series : pd.Series contains numpy array containing [latitude, longitude]
    to_address : str
        place and streetname
    dist_type : str
        type of distance (latlong, or geodesic)
    verbose : int
        whether to show the workings of the function

    Returns np.Series of distance between input and location
    '''
    # if an address is given
    if isinstance(to_address, str):
        call = f'https://developers.onemap.sg/commonapi/search?searchVal={to_address}&returnGeom=Y&getAddrDetails=Y'
        response = requests.get(call)
        response.raise_for_status()
        data = response.json()
        to_coordinates = np.array([float(data['results'][0]['LATITUDE']), float(data['results'][0]['LONGITUDE'])])

    if verbose==1:
        print(f'Coordinates of {to_address} : {to_coordinates}')

    def matrix_operations(from_coordinates, to_coordinates):
        # Matrix substraction to get difference 
        distance_diff = from_coordinates - to_coordinates
        absolute_dist = np.absolute(distance_diff)

        #Matrix sum over latitude and longitude of each entry
        sum_of_distances = np.sum(absolute_dist)

        if verbose==2:
            print(f'Difference in distances: \n{distance_diff}')
            print()
            print(f'Absolute difference: \n{absolute_dist}')
            print()
            print(f'Sum of distances \n {sum_of_distances}')
        
        return sum_of_distances

    def geodesic_operations(from_coordinates, coordinates):
        from_coordinates = tuple(from_coordinates)
        coordinates = tuple(coordinates)
        geodesic_dist = GD(from_coordinates, coordinates).kilometers
        return np.round(geodesic_dist,2)
    
    if dist_type == 'geodesic':
        diff_dist = df_series.apply(geodesic_operations, coordinates=to_coordinates)
    else:
        diff_dist = df_series.apply(matrix_operations, coordinates=to_coordinates)

    return diff_dist

In [None]:
data = {'floor_area_sqm': float(),
        'remaining_lease':float(),
        'avg_storey': float()}
        
for_mean_encoding = pd.DataFrame({'town': 'Bukit Batok',
                                  'rooms': float()},
                                    index=[0])

df = pd.DataFrame(data, index=[0])

# Calculate distance to marina bay through OneMap API call
try:
    df['dist_to_marina_bay'] = distance_to('', 'Marina Bay', verbose=0)
except Exception as error:
    print('Unable to get location of address given, please try again.')

# Mean encoding
df['mean_encoded'] = mean_encoder.transform(for_mean_encoding)
df = scaler.transform(df)

# Prediction
try:
    prediction = int(model.predict(df)[0])
except ValueError as error: 
    print('No such type of flat found in Town specified, please try again.')

print(f'Prediction: SGD{prediction}')