In [11]:
import pandas as pd 
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.ensemble import RandomForestRegressor



# Read the data
xy_ice_mask = pd.read_csv('data/ice_mask.csv')
xy_ice_thickness = pd.read_csv('data/ice_thickness.csv')
xy_ice_velocity = pd.read_csv('data/ice_velocity.csv')

# Separate out into ones with x and y coordinates and ones without 
ice_mask = xy_ice_mask.drop(['x-axis', 'y-axis'], axis=1)
ice_thickness = xy_ice_thickness.drop(['x-axis', 'y-axis'], axis=1)
ice_velocity = xy_ice_velocity.drop(['x-axis', 'y-axis'], axis=1)


def split_data(df, test_size = 0.2, random_state=None):
    X = df.iloc[:, :-1] #selects all but the last column 
    y = df.iloc[:, -1] #selects only the last column

    # Splitting the data into training and testing sets
    x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=random_state)

    return x_train, x_test, y_train, y_test


# linear regression 
def linearRegression(df):
    x_train, x_test, y_train, y_test = split_data(df) #split into training and testing
    model = LinearRegression() #define the model 
    model.fit(x_train, y_train) #fit the model

    y_predicted = model.predict(x_test) #predict the values
    mse = mean_squared_error(y_test, y_predicted) #calculate the mean squared error
    return mse

# non linear regression -> Random Forest 
def randomForest(df):
    x_train, x_test, y_train, y_test = split_data(df) #split into training and testing
    model = RandomForestRegressor() #define the model 
    model.fit(x_train, y_train) #fit the model

    y_predicted = model.predict(x_test) #predict the values
    mse = mean_squared_error(y_test, y_predicted) #calculate the mean squared error
    return mse

def printing():
    print("NO XY LINEAR REGRESSION RESULTS")
    print(f"Ice Mask: {linearRegression(ice_mask):,.3f}")
    print(f"Ice Thickness: {linearRegression(ice_thickness):,.3f}")
    print(f"Ice Velocity: {linearRegression(ice_velocity):,.3f}")

    print("XY LINEAR REGRESSION RESULTS")
    print(f"Ice Mask: {linearRegression(xy_ice_mask):,.3f}")    
    print(f"Ice Thickness: {linearRegression(xy_ice_thickness):,.3f}")
    print(f"Ice Velocity: {linearRegression(xy_ice_velocity):,.3f}")

    print("NO XY RANDOM FOREST RESULTS")
    print(f"Ice Mask: {randomForest(ice_mask):,.3f}")
    print(f"Ice Thickness: {randomForest(ice_thickness):,.3f}")
    print(f"Ice Velocity: {randomForest(ice_velocity):,.3f}")

    print("XY RANDOM FOREST RESULTS")
    print(f"Ice Mask: {randomForest(xy_ice_mask):,.3f}")
    print(f"Ice Thickness: {randomForest(xy_ice_thickness):,.3f}")
    print(f"Ice Velocity: {randomForest(xy_ice_velocity):,.3f}")


printing()



NO XY LINEAR REGRESSION RESULTS
Ice Mask:  0.22344310796555156
Ice Thickness:  192725.810579041
Ice Velocity:  33015.44013523999
XY LINEAR REGRESSION RESULTS
Ice Mask: 0.173
Ice Thickness:  156564.16193682686
Ice Velocity:  42405.52238527415
NO XY RANDOM FOREST RESULTS
Ice Mask:  0.0990826437149944
Ice Thickness:  97525.74696064829
Ice Velocity:  32885.28618272632
XY RANDOM FOREST RESULTS
Ice Mask:  0.04364866304588226
Ice Thickness:  65307.243650973964
Ice Velocity:  18564.408645057465
