In [165]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import copy
import seaborn as sns
import tensorflow as tf
from sklearn.linear_model import LinearRegression

In [118]:
df = pd.read_csv("bike_weather_42_2023-03-01_2023-03-31.csv")
df.head() 

Unnamed: 0,station_number,s_date,s_time,available_bikes,available_bike_stands,datetime,w_date,w_time,weather_main,temperature,humidity,wind_speed
0,42,03/03/2023,10:47:16,15,15,03/03/2023 10:47,03/03/2023,10:53:05,Drizzle,279.57,83.0,5.66
1,42,03/03/2023,10:57:55,14,16,03/03/2023 10:57,03/03/2023,10:55:54,Drizzle,279.57,83.0,5.66
2,42,03/03/2023,10:57:55,14,16,03/03/2023 10:57,03/03/2023,10:55:54,Drizzle,279.57,83.0,5.66
3,42,03/03/2023,11:06:40,13,17,03/03/2023 11:06,03/03/2023,11:08:05,Drizzle,279.75,83.0,5.66
4,42,03/03/2023,11:06:40,13,17,03/03/2023 11:06,03/03/2023,11:08:05,Drizzle,279.75,83.0,5.66


# Changing the weather_main value 

In [119]:
df["weather_main"].unique()

array(['Drizzle', 'Clouds', 'Rain', 'Clear', 'Overcast', 'Rain, Overcast',
       'Snow, Rain, Overcast', 'Partially cloudy',
       'Rain, Partially cloudy', 'Snow, Rain, Partially cloudy'],
      dtype=object)

In [120]:
weather_mapping = {"Drizzle": "Rain", 
                   "Rain": "Rain", 
                   "Snow, Rain, Overcast": "Partially Rain", 
                   "Clouds": "Cloudy", 
                   "Overcast": "Cloudy", 
                   "Snow Rain Overcast": "Partially Rain",
                   "Rain, Overcast":"Partially rain",
                   "Snow, Rain, Partially cloudy": "Partially Rain", 
                   "Clear": "Sunny", 
                   "Partially cloudy": "Partially cloudy", 
                   "Rain, Partially cloudy": "Partially cloudy"}
df["weather_main"] = df["weather_main"].replace(weather_mapping)

In [121]:
df["weather_main"].unique()


array(['Rain', 'Cloudy', 'Sunny', 'Partially rain', 'Partially Rain',
       'Partially cloudy'], dtype=object)

# Creating a column for the bike availability (%)

In [122]:
df["availability"] = df["available_bikes"]/(df["available_bikes"] + df["available_bike_stands"])
df["availability"].round(2)

0       0.50
1       0.47
2       0.47
3       0.43
4       0.43
        ... 
8186    0.97
8187    0.97
8188    0.97
8189    0.97
8190    0.97
Name: availability, Length: 8191, dtype: float64

# Deleting the duplicate time values

In [123]:
df = df.drop(["s_date", "s_time", "w_date", "w_time"], axis = 1)

# Comverting the temparature to 

In [124]:
df.loc[df["temperature"] >= 200, "temperature"] -= 273

In [125]:
df["temperature"]

0       6.57
1       6.57
2       6.57
3       6.75
4       6.75
        ... 
8186    9.30
8187    9.30
8188    9.30
8189    9.30
8190    9.30
Name: temperature, Length: 8191, dtype: float64

# Splitting the data time


In [126]:
df["datetime"] = pd.to_datetime(df["datetime"])
df["year"] = df["datetime"].dt.year
df["month"] = df["datetime"].dt.month
df["day"] = df["datetime"].dt.day
df["time"] = df["datetime"].dt.time
df["hour"] = df["datetime"].dt.round("H").dt.hour.astype(int)
df["day_of_week"] = df["datetime"].dt.strftime('%A')



In [127]:
df.head()

Unnamed: 0,station_number,available_bikes,available_bike_stands,datetime,weather_main,temperature,humidity,wind_speed,availability,year,month,day,time,hour,day_of_week
0,42,15,15,2023-03-03 10:47:00,Rain,6.57,83.0,5.66,0.5,2023,3,3,10:47:00,11,Friday
1,42,14,16,2023-03-03 10:57:00,Rain,6.57,83.0,5.66,0.466667,2023,3,3,10:57:00,11,Friday
2,42,14,16,2023-03-03 10:57:00,Rain,6.57,83.0,5.66,0.466667,2023,3,3,10:57:00,11,Friday
3,42,13,17,2023-03-03 11:06:00,Rain,6.75,83.0,5.66,0.433333,2023,3,3,11:06:00,11,Friday
4,42,13,17,2023-03-03 11:06:00,Rain,6.75,83.0,5.66,0.433333,2023,3,3,11:06:00,11,Friday


# One-hot encoding for the categorical features

In [128]:
one_hot = pd.get_dummies(df["day_of_week"])
df = pd.concat([df, one_hot], axis = 1)

In [129]:
one_hot = pd.get_dummies(df["weather_main"])
df = pd.concat([df, one_hot], axis = 1)

In [130]:
df.head()

Unnamed: 0,station_number,available_bikes,available_bike_stands,datetime,weather_main,temperature,humidity,wind_speed,availability,year,...,Sunday,Thursday,Tuesday,Wednesday,Cloudy,Partially Rain,Partially cloudy,Partially rain,Rain,Sunny
0,42,15,15,2023-03-03 10:47:00,Rain,6.57,83.0,5.66,0.5,2023,...,0,0,0,0,0,0,0,0,1,0
1,42,14,16,2023-03-03 10:57:00,Rain,6.57,83.0,5.66,0.466667,2023,...,0,0,0,0,0,0,0,0,1,0
2,42,14,16,2023-03-03 10:57:00,Rain,6.57,83.0,5.66,0.466667,2023,...,0,0,0,0,0,0,0,0,1,0
3,42,13,17,2023-03-03 11:06:00,Rain,6.75,83.0,5.66,0.433333,2023,...,0,0,0,0,0,0,0,0,1,0
4,42,13,17,2023-03-03 11:06:00,Rain,6.75,83.0,5.66,0.433333,2023,...,0,0,0,0,0,0,0,0,1,0


# Changing the order of the columns

In [131]:
df.columns

Index(['station_number', 'available_bikes', 'available_bike_stands',
       'datetime', 'weather_main', 'temperature', 'humidity', 'wind_speed',
       'availability', 'year', 'month', 'day', 'time', 'hour', 'day_of_week',
       'Friday', 'Monday', 'Saturday', 'Sunday', 'Thursday', 'Tuesday',
       'Wednesday', 'Cloudy', 'Partially Rain', 'Partially cloudy',
       'Partially rain', 'Rain', 'Sunny'],
      dtype='object')

In [132]:
df = df[['station_number', 'available_bikes', 'available_bike_stands',
       'weather_main', 'temperature', 'humidity', 'wind_speed','Cloudy',
       'Partially Rain', 'Partially cloudy', 'Partially rain', 'Rain', 'Sunny',
       'datetime', 'year', 'month', 'day', 'time','hour', 'day_of_week',
       'Monday', 'Tuesday','Wednesday','Thursday','Friday','Saturday', 'Sunday', 
       'availability']]

In [133]:
#popped_weather_main = df.pop("weather_main")
#df.insert(8, "weather", popped_weather_main)

In [134]:
#popped_datetime = df.pop("datetime")
#df.insert(df.columns.get_loc('year') - 1, "datetime", popped_datetime)

In [135]:
#popped_availability = df.pop("availability")
#df.insert(len(df.columns), "availability", popped_availability)

In [136]:
df.head()

Unnamed: 0,station_number,available_bikes,available_bike_stands,weather_main,temperature,humidity,wind_speed,Cloudy,Partially Rain,Partially cloudy,...,hour,day_of_week,Monday,Tuesday,Wednesday,Thursday,Friday,Saturday,Sunday,availability
0,42,15,15,Rain,6.57,83.0,5.66,0,0,0,...,11,Friday,0,0,0,0,1,0,0,0.5
1,42,14,16,Rain,6.57,83.0,5.66,0,0,0,...,11,Friday,0,0,0,0,1,0,0,0.466667
2,42,14,16,Rain,6.57,83.0,5.66,0,0,0,...,11,Friday,0,0,0,0,1,0,0,0.466667
3,42,13,17,Rain,6.75,83.0,5.66,0,0,0,...,11,Friday,0,0,0,0,1,0,0,0.433333
4,42,13,17,Rain,6.75,83.0,5.66,0,0,0,...,11,Friday,0,0,0,0,1,0,0,0.433333


# Changing the data type

In [144]:
df.dtypes

station_number                    int64
available_bikes                   int64
available_bike_stands             int64
weather_main                     object
temperature                     float64
humidity                        float64
wind_speed                      float64
Cloudy                            uint8
Partially Rain                    uint8
Partially cloudy                  uint8
Partially rain                    uint8
Rain                              uint8
Sunny                             uint8
datetime                 datetime64[ns]
year                              int64
month                             int64
day                               int64
time                             object
hour                              int64
day_of_week                      object
Monday                            uint8
Tuesday                           uint8
Wednesday                         uint8
Thursday                          uint8
Friday                            uint8


In [145]:
for column in df.columns:
    if df[column].dtype == "uint8":
        df[column] = df[column].astype("int64")

In [151]:
train, valid, test =  np.split(df.sample(frac = 1), [int(0.6*len(df)), int(0.8*len(df))])

In [152]:
train

Unnamed: 0,station_number,available_bikes,available_bike_stands,weather_main,temperature,humidity,wind_speed,Cloudy,Partially Rain,Partially cloudy,...,hour,day_of_week,Monday,Tuesday,Wednesday,Thursday,Friday,Saturday,Sunday,availability
6138,42,8,22,Partially cloudy,7.50,86.06,27.80,0,0,1,...,20,Friday,0,0,0,0,1,0,0,0.266667
3768,42,10,20,Partially cloudy,14.10,75.73,22.70,0,0,1,...,14,Thursday,0,0,0,1,0,0,0,0.333333
3568,42,24,5,Partially cloudy,10.30,90.77,15.80,0,0,1,...,21,Wednesday,0,0,1,0,0,0,0,0.827586
1384,42,22,8,Cloudy,3.58,64.00,6.17,1,0,0,...,7,Thursday,0,0,0,1,0,0,0,0.733333
2885,42,6,24,Partially cloudy,12.60,81.90,29.70,0,0,1,...,12,Monday,1,0,0,0,0,0,0,0.200000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7790,42,1,29,Partially cloudy,13.40,79.12,15.30,0,0,1,...,15,Thursday,0,0,0,1,0,0,0,0.033333
5737,42,1,29,Partially cloudy,11.60,67.60,29.00,0,0,1,...,11,Thursday,0,0,0,1,0,0,0,0.033333
6846,42,25,5,Sunny,-3.20,94.51,9.80,0,0,0,...,7,Monday,1,0,0,0,0,0,0,0.833333
4066,42,29,1,Partially cloudy,11.40,85.43,17.60,0,0,1,...,15,Friday,0,0,0,0,1,0,0,0.966667


In [154]:
train_x = train[['temperature', 'humidity', 'wind_speed','Cloudy',
       'Partially Rain', 'Partially cloudy', 'Partially rain', 'Rain', 'Sunny',
        'year', 'month', 'day', 'hour',
       'Monday', 'Tuesday','Wednesday','Thursday','Friday','Saturday', 'Sunday']]
train_y = train[["availability"]]

In [155]:
train.dtypes

station_number                    int64
available_bikes                   int64
available_bike_stands             int64
weather_main                     object
temperature                     float64
humidity                        float64
wind_speed                      float64
Cloudy                            int64
Partially Rain                    int64
Partially cloudy                  int64
Partially rain                    int64
Rain                              int64
Sunny                             int64
datetime                 datetime64[ns]
year                              int64
month                             int64
day                               int64
time                             object
hour                              int64
day_of_week                      object
Monday                            int64
Tuesday                           int64
Wednesday                         int64
Thursday                          int64
Friday                            int64


# kNN

In [158]:
#from sklearn.neighbors import KNeighborsClassifier

In [159]:
#knn_model =  KNeighborsClassifier(n_neighbors = 1)
#knn_model.fit(train_x, train_y)

# Linear regression