# Yulu Case Study Analysis
This notebook contains analysis of Yulu bike rental data.

## Data Loading and Preprocessing

In [None]:

import pandas as pd

# Load the dataset
df = pd.read_csv('bike_sharing.txt')

# Data overview
df.info()
df.isnull().sum()
df['date'] = pd.to_datetime(df['date'])

# Shape and datatypes
print(df.shape)
print(df.dtypes)

# Convert categorical attributes
df['season'] = df['season'].astype('category')
df['holiday'] = df['holiday'].astype('category')
df['workingday'] = df['workingday'].astype('category')
df['weather'] = df['weather'].astype('category')


## Univariate and Bivariate Analysis

In [None]:

import seaborn as sns
import matplotlib.pyplot as plt

# Histogram for Humidity
sns.histplot(x='humidity', data=df, bins=20, kde=True, color='blue')
plt.title('Humidity Distribution')
plt.show()

# Temperature Distribution
sns.displot(data=df, x='temp', kde=True, bins=20, color='green')
plt.title('Temperature Distribution')
plt.show()

# Countplot for Seasons
sns.countplot(x='season', data=df, palette='pastel')
plt.title('Season Distribution')
plt.show()

# Boxplot for Season vs Count
sns.boxplot(x='season', y='count', data=df)
plt.title('Season vs Bike Rentals')
plt.show()

# Heatmap for Correlation
corr = df.select_dtypes(include=['number']).corr()
sns.heatmap(corr, annot=True, cmap='coolwarm')
plt.title('Correlation Heatmap')
plt.show()


## Missing Values and Outliers

In [None]:

# Missing values
missing = df.isnull().sum()
missing_percent = (df.isnull().mean()) * 100
print(pd.DataFrame({'Missing Values': missing, 'Percentage': missing_percent}))

# Outlier Detection
numerical = df.select_dtypes(include=['float64', 'int64']).columns
Q1 = df[numerical].quantile(0.25)
Q3 = df[numerical].quantile(0.75)
IQR = Q3 - Q1
outliers = ((df[numerical] < (Q1 - 1.5 * IQR)) | (df[numerical] > (Q3 + 1.5 * IQR))).any(axis=1)
df['outlier'] = outliers


## Statistical Testing

### T-Test: Working vs Non-working Days

In [None]:

from scipy.stats import shapiro, levene, mannwhitneyu

wday = df[df['workingday'] == 1]['count']
nwday = df[df['workingday'] == 0]['count']

# Normality check
print("Shapiro Test - Working Day:", shapiro(wday))
print("Shapiro Test - Non-working Day:", shapiro(nwday))

# Homogeneity of variance
print("Levene’s Test:", levene(wday, nwday))

# Mann-Whitney U Test (non-parametric)
stat, pval = mannwhitneyu(wday, nwday)
print("Mann-Whitney U Test:", stat, pval)


### ANOVA Test: Season and Weather

In [None]:

from scipy.stats import f_oneway

# Season
season_anova = f_oneway(df[df['season'] == 1]['count'], df[df['season'] == 2]['count'],
                        df[df['season'] == 3]['count'], df[df['season'] == 4]['count'])
print("Season ANOVA:", season_anova)

# Weather
weather_anova = f_oneway(df[df['weather'] == 1]['count'], df[df['weather'] == 2]['count'],
                         df[df['weather'] == 3]['count'], df[df['weather'] == 4]['count'])
print("Weather ANOVA:", weather_anova)


### Chi-Square Test: Season vs Weather

In [None]:

from scipy.stats import chi2_contingency

contingency = pd.crosstab(df['weather'], df['season'])
chi2, pval, _, _ = chi2_contingency(contingency)
print("Chi-Square Test:", chi2, pval)


## Summary and Recommendations


### Key Insights:
- Summer & Fall have the highest rentals, winter the lowest.
- Clear weather boosts rentals; rainy/snowy days reduce demand.
- Working vs non-working days do **not** show significant rental differences.
- Season and Weather are **not independent**.

### Recommendations:
1. Increase bike availability during Summer & Fall.
2. Launch weather-based pricing (discounts in rain).
3. Target marketing during low-demand seasons.
4. Use weather forecasts for real-time deployment planning.
5. Focus on events, not weekdays/weekends, for demand spikes.
