### Importing Libraries

In [None]:
#Data Processing
import pandas as pd
import numpy as np

#Data Visulaisation
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import plotly.express as px

import os

# Listing down the required files

In [None]:
files = [filename for filename in os.listdir(r'../input/uber-pickups-in-new-york-city') if filename.startswith("uber-")]
files

In [None]:
files.remove('uber-raw-data-janjune-15.csv')

# Concatenate the data

In [None]:
path=r'../input/uber-pickups-in-new-york-city'

Data=pd.DataFrame()

for file in files:
    df=pd.read_csv(path+"/"+file,encoding='utf-8')
    Data=pd.concat([df,Data])

In [None]:
Data.sample(frac=0.5)

# Checking data attributes

In [None]:
Data.shape

In [None]:
data=Data.copy()

In [None]:
data.dtypes

# Data Preprocessing

In [None]:
data['Date/Time']=pd.to_datetime(data['Date/Time'], format='%m/%d/%Y %H:%M:%S')

In [None]:
data.dtypes

In [None]:
data['month']=data['Date/Time'].dt.month
data['weekday']=data['Date/Time'].dt.day_name()
data['day']=data['Date/Time'].dt.day
data['hour']=data['Date/Time'].dt.hour
data['minute']=data['Date/Time'].dt.minute

In [None]:
data.head()

In [None]:
data.dtypes

In [None]:
#00. Geospatial Analysis to understand the cab demand

In [None]:
Demand=(data.groupby(['Lat','Lon']).count()['Date/Time']).reset_index()
Demand.columns=['Latitude','Longitude','Number of Trips']

In [None]:
import folium as fo
from folium.plugins import HeatMap

In [None]:
base=fo.Map()

In [None]:
HeatMap(Demand,zoom=20,radius=15).add_to(base)
base

In [None]:
#01. Which month and day of the month sees the highest number of uber trips?

In [None]:
weekday=pd.DataFrame(data[['day','month']].value_counts()).reset_index()
weekday.columns=['Day','Month','Count']
weekday['Day']=pd.Categorical(weekday['Day'],categories=[1 ,2 ,3 ,4 ,5 ,6 ,7 ,8 ,9 ,10 ,11 ,12 ,13 ,14 ,15 ,16 ,17 ,18 ,19 ,20 ,21 ,22 ,23 ,24 ,25 ,26 ,27 ,28 ,29 ,30 ,31],ordered=True)
weekday['Month']=pd.Categorical(weekday['Month'],categories=[4,5,6,7,8,9],ordered=True)

In [None]:
fig1=px.bar(weekday,
      x='Day',
      y='Count',
      color='Month',
      template='plotly_dark',
      labels={'Count':'Number of Trips','Day':'Day of the Month'},
      width=1100,
      height=500,
      category_orders={"Month": [4,5,6,7,8,9]},
      color_discrete_sequence=['#2C2C3E','#2E5467','#1E7F84','#33AC8D','#78D584','#D1FA74'],
    text='Count')
fig1.update_traces(texttemplate='%{text:.2s}', textposition='outside')
fig1.update_layout(uniformtext_minsize=8, uniformtext_mode='hide')
fig1.update_layout(title_text='Uber trip by Months and Days', title_x=0.5)
fig1.show()

In [None]:
#September'14 sees the highest number of trips

In [None]:
#02. Which is the busiest hour in the day for uber cabs?

In [None]:
hour=pd.DataFrame(data['hour'].value_counts()).reset_index()
hour.columns=['Hour','Count']
hour=hour.sort_values(by='Hour')

In [None]:
fig2=px.bar(hour,
      x='Hour',
      y='Count',
      template='plotly_dark',
      labels={'Count':'Number of Trips'},
      height=400,
      width=1100,
      text='Count'
      )
#fig2.update_traces(texttemplate='%{text:.1s}', textposition='outside')
#fig2.update_layout(title_text='Uber Rides by hour',title_x=0.5)
fig2.show()

In [None]:
#Maximum number of rides are taken between 4-7 PM in a day

In [None]:
plt.figure(figsize=(20,20))
plt.style.use("seaborn-dark-palette")
colors = ['#636EFA']
for i, month in enumerate(data['month'].unique()):
    plt.subplot(3,2,i+1)
    data[data['month']==month]['hour'].hist(color=colors)

In [None]:
#Maximum number of rides were taken in September

In [None]:
#03. Distribution of Trips By Days in a Month

In [None]:
trips_by_days=pd.DataFrame(data['day'].value_counts()).reset_index()
trips_by_days.columns=['Days','Number of Trips']
trips_by_days=trips_by_days.sort_values(by='Days')

In [None]:
fig3=px.histogram(trips_by_days,
            x='Days',
            y='Number of Trips',
            height=400,
            width=1100,
            nbins=31,
            template='plotly_dark',
            color_discrete_sequence=['#C78845'])
fig3.update_layout(bargap=0.2)
fig3.update_layout(title_text='Distribution of trips by days in a Month',title_x=0.5)
fig3.show()


In [None]:
#04. Base locations with highest number of pickups

In [None]:
trips_by_loc=data[['Base','hour']].value_counts().reset_index()
trips_by_loc.columns=['Base','Hour','Number of Trips']
trips_by_loc

In [None]:
fig4=px.scatter(trips_by_loc,
            x='Hour',
            y='Number of Trips',
            color='Base',
            template='plotly_dark',
            color_discrete_sequence=['#50F9F1','#6AE5A8','#96C96A','#B7AA47','#C78845'])
fig4.update_layout(title_text='Trips by location and time of the day', title_x=0.5)
fig4.show()

In [None]:
#05. Cross Analysis between hours and weekdays

In [None]:
hour_week=data.groupby(['weekday','hour']).count()['Date/Time']
hour_week
#hour_week.columns=['WeekDay','Hour','Number of Trips']

In [None]:
pivot=hour_week.unstack()
pivot

In [None]:
plt.figure(figsize=(10,10))
sns.set_theme(style='darkgrid')
fig5=sns.heatmap(pivot,cmap='Blues_r')