In [None]:
# This is just a little Jupyter notebook to try some of the techniques I have been learning recently.

import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split, KFold, cross_val_predict

# import dataset and get some interesting info
weather_history = pd.read_csv('weatherHistory.csv')

weather_history.info()
weather_history.describe()

# Now let's visualise some of that data


%matplotlib inline
from IPython.display import display
useful_vars = ['Temperature (C)', 'Apparent Temperature (C)', 'Humidity', 'Wind Speed (km/h)', 'Wind Bearing (degrees)', 'Visibility (km)','Pressure (millibars)']
sns.pairplot(data=weather_history, vars=useful_vars)

# Ok, so it seems that actual and apparent temperatures are correlated (obviously), let's verify that.
plt.scatter(weather_history['Temperature (C)'],weather_history['Apparent Temperature (C)'])

# Suspicions confirmed! Let's create a little algorithm to try and do some predictions


X = weather_history["Temperature (C)"].values.reshape(-1, 1) # [:, np.newaxis] can also be used
y = weather_history["Apparent Temperature (C)"].values

# [1 Mark] - Perform Split: 
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size = 0.8, test_size=0.2, random_state=0)


#prepare Crossvalidation split:
cv = KFold(n_splits=5, shuffle=True, random_state=0)

# [Brownie Points] Visualise Folds
for ii, (train_idx, valid_idx) in enumerate(cv.split(weather_history)):
    print('Fold {}:\nTrain ids: {}\nValid ids: {}\n'
          '\n----------------------------------------\n'.format(ii, train_idx, valid_idx))

#implement linear regression
lm = LinearRegression()
lm.fit(X_train,y_train)

# [1 Mark] - Report Training Data
print('Apparent Temperature = {:.3f} * temperature + {:.3f}'.format(float(lm.coef_), lm.intercept_))


#Testing
y_pred = lm.predict(X_test)

# [2 Marks] Plot both Scatters
plt.scatter(X_test, y_test, label='Test Data')
plt.scatter(X_test, y_pred, color='r', marker='+', s=50, label='LR pred.')

# And there's our cool graph after we have implemented linear regression, and tested it.