In [None]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt 
import seaborn as sb 
from sklearn.model_selection import train_test_split 
from sklearn.preprocessing import LabelEncoder, StandardScaler 
from sklearn import metrics 
from sklearn.svm import SVC 
from xgboost import XGBRegressor 
from sklearn.linear_model import LinearRegression, Lasso, Ridge 
from sklearn.ensemble import RandomForestRegressor 
  
import warnings 
warnings.filterwarnings('ignore')

In [None]:
df = pd.read_csv('ola.csv') 
df.head()
df.shape
df.info()
df.describe().T


In [None]:
parts = df["datetime"].str.split(" ", n=2, expand=True) 
df["date"] = parts[0] 
df["time"] = parts[1].str[:2].astype('int') 
df.head() 


In [None]:
parts = df["date"].str.split("-", n=3, expand=True) 
df["day"] = parts[0].astype('int') 
df["month"] = parts[1].astype('int') 
df["year"] = parts[2].astype('int') 
df.head() 


In [None]:
from datetime import datetime 
import calendar 
  
  
def weekend_or_weekday(year, month, day): 
  
    d = datetime(year, month, day) 
    if d.weekday() > 4: 
        return 0
    else: 
        return 1
  
  
df['weekday'] = df.apply(lambda x: 
                         weekend_or_weekday(x['year'], 
                                            x['month'], 
                                            x['day']), 
                         axis=1) 
df.head() 

In [None]:
def am_or_pm(x): 
    if x > 11: 
        return 1
    else: 
        return 0
  
  
df['am_or_pm'] = df['time'].apply(am_or_pm) 
df.head()

In [None]:
from datetime import date 
import holidays 
  
  
def is_holiday(x): 
  
    india_holidays = holidays.country_holidays('IN') 
  
    if india_holidays.get(x): 
        return 1
    else: 
        return 0
  
  
df['holidays'] = df['date'].apply(is_holiday) 
df.head() 

In [None]:
df.drop(['datetime', 'date'], 
        axis=1, 
        inplace=True) 
df.isnull().sum()


In [None]:
features = ['day', 'time', 'month'] 
  
plt.subplots(figsize=(15, 10)) 
for i, col in enumerate(features): 
    plt.subplot(2, 2, i + 1) 
    df.groupby(col).mean()['count'].plot() 
plt.show() 

In [None]:
features = ['season', 'weather', 'holidays',\ 
            'am_or_pm', 'year', 'weekday'] 
  
plt.subplots(figsize=(20, 10)) 
for i, col in enumerate(features): 
    plt.subplot(2, 3, i + 1) 
    df.groupby(col).mean()['count'].plot.bar() 
plt.show() 

In [None]:
features = ['temp', 'windspeed'] 
  
plt.subplots(figsize=(15, 5)) 
for i, col in enumerate(features): 
  plt.subplot(1, 2, i + 1) 
  sb.distplot(df[col]) 
plt.show()

In [None]:
features = ['temp', 'windspeed'] 
  
plt.subplots(figsize=(15, 5)) 
for i, col in enumerate(features): 
  plt.subplot(1, 2, i + 1) 
  sb.boxplot(df[col]) 
plt.show()

In [None]:
num_rows = df.shape[0] - df[df['windspeed']<32].shape[0] 
print(f'Number of rows that will be lost if we remove outliers is equal to {num_rows}.')


In [None]:
features = ['humidity', 'casual', 'registered', 'count'] 
  
plt.subplots(figsize=(15, 10)) 
for i, col in enumerate(features): 
    plt.subplot(2, 2, i + 1) 
    sb.boxplot(df[col]) 
plt.show() 

In [None]:
sb.heatmap(df.corr() > 0.8, 
           annot=True, 
           cbar=False) 
plt.show()

In [None]:
df.drop(['registered', 'time'], axis=1, inplace=True) 
df = df[(df['windspeed'] < 32) & (df['humidity'] > 0)]


In [None]:
features = df.drop(['count'], axis=1) 
target = df['count'].values 
  
X_train, X_val, Y_train, Y_val = train_test_split(features, 
                                                  target, 
                                                  test_size = 0.1, 
                                                  random_state=22) 
X_train.shape, X_val.shape

In [None]:
scaler = StandardScaler() 
X_train = scaler.fit_transform(X_train) 
X_val = scaler.transform(X_val) 
from sklearn.metrics import mean_absolute_error as mae 
models = [LinearRegression(), XGBRegressor(), Lasso(), 
          RandomForestRegressor(), Ridge()] 
  
for i in range(5): 
    models[i].fit(X_train, Y_train) 
  
    print(f'{models[i]} : ') 
  
    train_preds = models[i].predict(X_train) 
    print('Training Error : ', mae(Y_train, train_preds)) 
  
    val_preds = models[i].predict(X_val) 
    print('Validation Error : ', mae(Y_val, val_preds)) 
    print() 

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix, classification_report, precision_score
import matplotlib.pyplot as plt
import seaborn as sns

# Load the dataset
data = pd.read_csv('rapido_bike_requests.csv')

# Data Cleaning and Preprocessing
def preprocess_data(df):
    # Handle missing values
    df = df.dropna()  # Drop rows with NaN values
    
    # Convert 'requests' to categorical by binning
    bins = [0, 50, 100, 150, 200, np.inf]
    labels = ['Very Low', 'Low', 'Medium', 'High', 'Very High']
    df['requests_binned'] = pd.cut(df['requests'], bins=bins, labels=labels)
    
    # Convert 'weather' to dummy variables
    df = pd.get_dummies(df, columns=['weather'], drop_first=True)
    
    # Add 'month' column if 'date' is present and not already processed
    if 'date' in df.columns:
        df['month'] = pd.to_datetime(df['date']).dt.month
        df = df.drop(columns=['date'])
    
    return df

data = preprocess_data(data)

# Define features and target
X = data.drop(['requests', 'requests_binned'], axis=1)
y = data['requests_binned']

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the Random Forest Classifier model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test)

# Evaluate the model
cm = confusion_matrix(y_test, y_pred)
report = classification_report(y_test, y_pred, target_names=labels)
precision = precision_score(y_test, y_pred, average='macro')

print(f'Confusion Matrix:\n{cm}')
print(f'Classification Report:\n{report}')
print(f'Precision Score: {precision}')

# Plot confusion matrix
plt.figure(figsize=(10, 7))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=labels, yticklabels=labels)
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title('Confusion Matrix')
plt.show()

# Monthly wise plot
monthly_data = data.groupby('month')['requests'].sum().reset_index()

plt.figure(figsize=(12, 6))
plt.plot(monthly_data['month'], monthly_data['requests'], marker='o')
plt.xlabel('Month')
plt.ylabel('Total Requests')
plt.title('Monthly Total Bike Requests')
plt.grid(True)
plt.xticks(monthly_data['month'])
plt.show()

# Function to forecast requests for a specific hour (classification approach)
def forecast_requests(hour, day_of_week, weather, temperature, model, columns):
    # Create a single-row dataframe with the input values
    input_data = pd.DataFrame({
        'hour': [hour],
        'day_of_week': [day_of_week],
        'temperature': [temperature],
        **{f'weather_{weather_val}': [1 if weather_val == weather else 0] for weather_val in columns if 'weather_' in weather_val}
    })
    return model.predict(input_data)[0]

# Example of forecasting
hour = 15
day_of_week = 3
weather = 2
temperature = 30
columns = X.columns
predicted_requests_bin = forecast_requests(hour, day_of_week, weather, temperature, model, columns)
print(f'Predicted Requests Bin at {hour}:00 on day {day_of_week} with weather {weather} and temperature {temperature}°C: {predicted_requests_bin}')
