In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from xgboost import XGBRegressor 

In [None]:
data = pd.read_csv("../input/london-bike-sharing-dataset/london_merged.csv")

In [None]:
data

In [None]:
data.info() 

In [None]:
{column: len(data[column].unique()) for column in data.columns}

In [None]:
# Convert datetime columns

data["timestamp"] = pd.to_datetime(data["timestamp"])
data["month"] = data["timestamp"].apply(lambda x:x.month)
data["day"] = data["timestamp"].apply(lambda x:x.day)
data["hour"] = data["timestamp"].apply(lambda x:x.hour)
data = data.drop("timestamp", axis=1) 

In [None]:
data.head() 

In [None]:
# One-hot encoding

dummies = pd.get_dummies(data["weather_code"], prefix="weather")
data = pd.concat([data,dummies], axis=1)
data = data.drop("weather_code", axis=1)

In [None]:
data.head() 

In [None]:
# Splitting & scaling the data

X = data.drop("cnt", axis=1)
y = data["cnt"]

scaler= StandardScaler()
scaler.fit(X)
X = pd.DataFrame(scaler.transform(X), index=X.index, columns=X.columns)

X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.75, shuffle=True, random_state=42)


In [None]:
#  Modelling and training

model = XGBRegressor() 
model.fit(X_train,y_train)

In [None]:
model.score(X_test,y_test)

In [None]:
y_preds = model.predict(X_test) 

In [None]:
y_preds 

In [None]:
# RMSE
rmse = np.sqrt(np.mean((y_test - y_preds)**2))  
print("RMSE is: {:.2f}".format(rmse))

In [None]:
# r2 score

from sklearn.metrics import r2_score

r2 = r2_score(y_test,y_preds) 
print("R2 score is: {:.2f}".format(r2))

In [None]:
# Plotting y_test vs y_preds

plt.scatter(y_preds, y_test)
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.title("Actual Vs Predicted Values")