### Importing Libraries


In [37]:
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import LinearRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.metrics import f1_score, log_loss, accuracy_score, jaccard_score

### Importing the Dataset


In [None]:
path='https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBMDeveloperSkillsNetwork-ML0101EN-SkillUp/labs/ML-FinalAssignment/Weather_Data.csv'

In [4]:

url = 'https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBMDeveloperSkillsNetwork-ML0101EN-SkillUp/labs/ML-FinalAssignment/Weather_Data.csv'
df = pd.read_csv(url)


In [6]:
df.head()

Unnamed: 0,Date,MinTemp,MaxTemp,Rainfall,Evaporation,Sunshine,WindGustDir,WindGustSpeed,WindDir9am,WindDir3pm,...,Humidity9am,Humidity3pm,Pressure9am,Pressure3pm,Cloud9am,Cloud3pm,Temp9am,Temp3pm,RainToday,RainTomorrow
0,2/1/2008,19.5,22.4,15.6,6.2,0.0,W,41,S,SSW,...,92,84,1017.6,1017.4,8,8,20.7,20.9,Yes,Yes
1,2/2/2008,19.5,25.6,6.0,3.4,2.7,W,41,W,E,...,83,73,1017.9,1016.4,7,7,22.4,24.8,Yes,Yes
2,2/3/2008,21.6,24.5,6.6,2.4,0.1,W,41,ESE,ESE,...,88,86,1016.7,1015.6,7,8,23.5,23.0,Yes,Yes
3,2/4/2008,20.2,22.8,18.8,2.2,0.0,W,41,NNE,E,...,83,90,1014.2,1011.8,8,8,21.4,20.9,Yes,Yes
4,2/5/2008,19.7,25.7,77.4,4.8,0.0,W,41,NNE,W,...,88,74,1008.3,1004.8,8,8,22.5,25.5,Yes,Yes


### Data Preprocessing


#### One Hot Encoding


In [8]:
df_sydney_processed = pd.get_dummies(data=df, columns=['RainToday', 'WindGustDir', 'WindDir9am', 'WindDir3pm'])

In [9]:
df_sydney_processed.replace(['No', 'Yes'], [0,1], inplace=True)

### Training Data and Test Data


In [10]:
df_sydney_processed.drop('Date',axis=1,inplace=True)

In [11]:
df_sydney_processed = df_sydney_processed.astype(float)

In [12]:
features = df_sydney_processed.drop(columns='RainTomorrow', axis=1)
Y = df_sydney_processed['RainTomorrow']

### Linear Regression


In [13]:
x_train, x_test, y_train, y_test = train_test_split(features,Y, train_size=0.8, random_state=10)

In [None]:
LinearReg = LinearRegression()
LinearReg.fit(x_train, y_train)

In [15]:
predictions = LinearReg.predict(x_test)

In [17]:
LinearRegression_MAE = mean_absolute_error(y_test, predictions)
LinearRegression_MSE = mean_squared_error(y_test, predictions)
LinearRegression_R2 = r2_score(y_test, predictions)

In [None]:
Report = pd.DataFrame({
    'Metric': ['MAE', 'MSE', 'R2'],
    'Value': [LinearRegression_MAE, LinearRegression_MSE, LinearRegression_R2]
})


print(Report)

### KNN


In [None]:
KNN = KNeighborsClassifier(n_neighbors = 4)
KNN.fit(x_train, y_train)

In [21]:
predictions = KNN.predict(x_test)

In [None]:
KNN_Accuracy_Score = accuracy_score(y_test, predictions)
KNN_JaccardIndex = jaccard_score(y_test, predictions)
KNN_F1_Score = f1_score(y_test, predictions)

print("Accuracy: ", KNN_Accuracy_Score)
print("Jaccard index: ", KNN_JaccardIndex)
print("f1 score: ", KNN_F1_Score)

### Decision Tree


In [None]:
Tree = DecisionTreeClassifier()
Tree.fit(x_train, y_train)

In [26]:
predictions = Tree.predict(x_test)

In [None]:
Tree_Accuracy_Score = accuracy_score(y_test, predictions)
Tree_JaccardIndex = jaccard_score(y_test, predictions)
Tree_F1_Score = f1_score(y_test, predictions)


print("Accuracy: ", Tree_Accuracy_Score)
print("Jaccard index: ", Tree_JaccardIndex)
print("F1 score: ", Tree_F1_Score)

### Logistic Regression


In [28]:
x_train, x_test, y_train, y_test = train_test_split(features, Y, test_size = 0.2, random_state=1)

In [None]:
LR = LogisticRegression(solver="liblinear")
LR.fit(x_train, y_train)

In [30]:
predictions = LR.predict(x_test)

In [None]:
LR_Accuracy_Score = accuracy_score(y_test, predictions)
LR_JaccardIndex = jaccard_score(y_test, predictions)
LR_F1_Score = f1_score(y_test, predictions)
LR_Log_Loss = log_loss(y_test, predictions)

# Displaying the results
print("Accuracy: ", LR_Accuracy_Score)
print("Jaccard index: ", LR_JaccardIndex)
print("F1 score: ", LR_F1_Score)
print("Log Loss: ", LR_Log_Loss)

### SVM


In [None]:
SVM = SVC()
SVM.fit(x_train , y_train)

In [46]:
predictions = SVM.predict(x_test)

In [None]:
SVM_Accuracy_Score = accuracy_score(y_test, predictions)
SVM_JaccardIndex = jaccard_score(y_test, predictions, average='macro')
SVM_F1_Score = f1_score(y_test, predictions, average='macro')

print("SVM Accuracy Score: ", SVM_Accuracy_Score)
print("SVM Jaccard Index: ", SVM_JaccardIndex)
print("SVM F1 Score: ", SVM_F1_Score)

# Final Report

In [None]:
Report = pd.DataFrame({
    'Metric': ['MAE', 'MSE', 'R2', 'Accuracy', 'Jaccard Index', 'F1 Score', 'Log Loss'],
    'Linear': [LinearRegression_MAE, LinearRegression_MSE, LinearRegression_R2, None, None, None, None],
    'KNN': [None, None, None, KNN_Accuracy_Score, KNN_JaccardIndex, KNN_F1_Score, None],
    'Trees': [None, None, None, Tree_Accuracy_Score, Tree_JaccardIndex, Tree_F1_Score, None],
    'Logistic': [None, None, None, LR_Accuracy_Score, LR_JaccardIndex, LR_F1_Score, LR_Log_Loss],
    'SVM': [None, None, None, SVM_Accuracy_Score, SVM_JaccardIndex, SVM_F1_Score, None]
})


print(Report.to_string(index = False))