In [1]:
# data manuipulation
import numpy as np
import pandas as pd

# modeling utilities
from sklearn import metrics
from sklearn import preprocessing
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import GridSearchCV, cross_val_score, cross_val_predict, train_test_split


# plotting libraries
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
import warnings
warnings.filterwarnings('ignore')

In [3]:
train_df = pd.read_csv('train_df.csv')
test_df = pd.read_csv('test_df.csv')
validate = pd.read_csv('validate.csv')

In [4]:
train_df.columns

Index(['temp', 'humidity', 'windspeed', 'hour', 'weekday', 'month', 'year',
       'season_1', 'season_2', 'season_3', 'season_4', 'is_holiday_0',
       'is_holiday_1', 'weather_condition_1', 'weather_condition_2',
       'weather_condition_3', 'weather_condition_4', 'is_workingday_0',
       'is_workingday_1', 'total_count'],
      dtype='object')

In [5]:
test_df.columns

Index(['temp', 'humidity', 'windspeed', 'hour', 'weekday', 'month', 'year',
       'season_1', 'season_2', 'season_3', 'season_4', 'is_holiday_0',
       'is_holiday_1', 'weather_condition_1', 'weather_condition_2',
       'weather_condition_3', 'weather_condition_4', 'is_workingday_0',
       'is_workingday_1'],
      dtype='object')

In [6]:
validate.columns

Index(['total_count'], dtype='object')

In [7]:
X = train_df.drop('total_count', axis=1)
y = train_df['total_count']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [8]:
# Create a Decision Tree Regressor
regressor = DecisionTreeRegressor()

# Train the model
regressor.fit(X_train, y_train)

# Make predictions on the test set
y_pred = regressor.predict(X_test)

In [9]:
print('Mean Absolute Error:', metrics.mean_absolute_error(y_test,y_pred))
print('Mean Squared Error:', metrics.mean_squared_error(y_test,y_pred))
print('Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(y_test,y_pred)))
print('r2_score:', metrics.r2_score(y_test,y_pred))

Mean Absolute Error: 36.9162730785745
Mean Squared Error: 3967.4040360669815
Root Mean Squared Error: 62.987332346012096
r2_score: 0.8756247414820431


In [10]:
validate_test = regressor.predict(test_df)

print('Mean Absolute Error:', metrics.mean_absolute_error(validate,validate_test))
print('Mean Squared Error:', metrics.mean_squared_error(validate,validate_test))
print('Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(validate,validate_test)))
print('r2_score:', metrics.r2_score(validate,validate_test))

Mean Absolute Error: 35.863929567642955
Mean Squared Error: 3698.9445170850768
Root Mean Squared Error: 60.818948668035006
r2_score: 0.8840508979513882
