In [1]:
import tensorflow as tf

In [2]:
%matplotlib inline 

import warnings
warnings.filterwarnings('ignore')

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import timeit
import keras
from sklearn.cross_validation import train_test_split
from sklearn.metrics import make_scorer
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier

sns.set_style('whitegrid')
pd.set_option('display.max_columns', None) # display all columns


In [3]:

bikes=pd.read_csv('../input/train.csv')
bikes_test=pd.read_csv('../input/test.csv')

In [4]:
bikes.head()

In [5]:
bikes.rename(columns={'count':'total'}, inplace=True)
dt=bikes_test['datetime']

In [6]:
bikes.head()

In [7]:
temp= pd.DatetimeIndex(bikes['datetime'])
bikes["dayofweek"] = temp.dayofweek
bikes["hour"] = temp.hour
bikes["month"] = temp.month
bikes['year']= temp.year
temp_t=pd.DatetimeIndex(bikes_test['datetime'])
bikes_test["dayofweek"] = temp_t.dayofweek
bikes_test["hour"] = temp_t.hour
bikes_test["month"] = temp_t.month
bikes_test['year']= temp_t.year

In [8]:
bikes_total = bikes["total"] 

In [9]:
bikes_total.head()

In [10]:
bikes = bikes.drop(['holiday', 'workingday'], axis = 1)

In [11]:
bikes_total.head()

In [12]:
bin = [0,15,30, 45, 60, 75, 90 ,10000]

category = pd.cut(bikes.total ,bin, labels=["very low","low","fair","high","very high","super high","amazing"])
category = category.to_frame()

category.columns = ['range']
#concatenate age and its bin
bikes_range = pd.concat([bikes.total,category],axis = 1)


In [13]:
type(category)


In [14]:
bikes_range.head()

In [15]:
bikes = bikes.drop(['total'], axis =1)

In [16]:
bikes.head()

In [17]:
bikes = pd.concat([bikes, bikes_range] , axis=1)

In [18]:
print(bikes.describe())

In [19]:
bikes[['windspeed','humidity','dayofweek']] = bikes[['windspeed','humidity','dayofweek']].replace(0, np.NaN)

In [20]:
bikes.head()

In [21]:
bikes = bikes.dropna(axis=0)

In [22]:
bikes.info()

In [23]:
bikes.head()

In [24]:
bikes_cl = bikes.copy()
target = bikes_cl['range'].copy()

In [25]:
categorial_cols = [ 'season' , 'weather']

for cc in categorial_cols:
    dummies = pd.get_dummies(bikes_cl[cc])
    dummies = dummies.add_prefix("{}#".format(cc))
    bikes_cl.drop(cc, axis=1, inplace=True)
    bikes_cl = bikes_cl.join(dummies)

In [26]:
bikes.corr()

In [27]:
import seaborn as sns
sns.heatmap(bikes.corr())

In [36]:
X=bikes_cl.drop(['casual','registered','total','datetime','range'],axis=1)
#y=bikes['range']
y=bikes_cl["range"]
#X=bikes_cl.drop(['casual','registered','total','datetime'],axis=1)
#y=target


X.info()

In [37]:
X.head()

In [38]:
y.head()

In [39]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.30, random_state = 42)
rfc = RandomForestClassifier()
rfc.fit(X_train, y_train)
rfc_pred = rfc.predict(X_test)
start_time = timeit.default_timer()
print("Time: ", timeit.default_timer() - start_time)
# Making the Confusion Matrix
from sklearn.metrics import confusion_matrix, accuracy_score
rfccm = confusion_matrix(y_test, rfc_pred)
print("\t\t\t\t\t---SKlearn Random Forest---")
print("confusion_matrix:\n", rfccm)
print("accuracy_score: ", accuracy_score(y_test, rfc_pred))   

In [40]:
knn = KNeighborsClassifier(n_neighbors=3)
knn.fit(X_train, y_train)
knn_pred = knn.predict(X_test)
start_time = timeit.default_timer()
print("Time: ", timeit.default_timer() - start_time)
# Making the Confusion Matrix
from sklearn.metrics import confusion_matrix, accuracy_score
knncm = confusion_matrix(y_test, knn_pred)
print("\t\t\t\t\t---SKlearn K-Nearest Neighbors---")
print("confusion_matrix:\n", knncm)
print("accuracy_score: ", accuracy_score(y_test, knn_pred))   

In [41]:
gnb = GaussianNB()
gnb.fit(X_train, y_train)
gnb_pred = gnb.predict(X_test)
start_time = timeit.default_timer()
print("Time: ", timeit.default_timer() - start_time)
# Making the Confusion Matrix
from sklearn.metrics import confusion_matrix, accuracy_score
gnbcm = confusion_matrix(y_test, gnb_pred)
print("\t\t\t\t\t---SKlearn Gaussian Naive Bayes---")
print("confusion_matrix:\n", gnbcm)
print("accuracy_score: ", accuracy_score(y_test, gnb_pred))  

In [42]:
dtc = DecisionTreeClassifier()
dtc.fit(X_train, y_train)
dtc_pred = dtc.predict(X_test)
start_time = timeit.default_timer()
print("Time: ", timeit.default_timer() - start_time)
# Making the Confusion Matrix
from sklearn.metrics import confusion_matrix, accuracy_score
dtccm = confusion_matrix(y_test, dtc_pred)
print("\t\t\t\t\t---SKlearn Decision Tree---")
print("confusion_matrix:\n", dtccm)
print("accuracy_score: ", accuracy_score(y_test, dtc_pred))  

In [43]:
from sklearn import svm
svc = svm.SVC(kernel='linear', gamma=1)
svc.fit(X_train, y_train)
svc_pred = svc.predict(X_test)
start_time = timeit.default_timer()
print("Time: ", timeit.default_timer() - start_time)
# Making the Confusion Matrix
from sklearn.metrics import confusion_matrix, accuracy_score
svccm = confusion_matrix(y_test, svc_pred)
print("\t\t\t\t\t---SKLearn SVM---")
print("confusion_matrix:\n", svccm)
print("accuracy_score: ", accuracy_score(y_test, svc_pred))  