# Python Libraries

In [None]:
!pip install plotly



In [None]:
import pandas as pd
import folium as fl
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio
from plotly.subplots import make_subplots
pio.templates.default="plotly_white"

In [None]:
metro_data=pd.read_csv("DelhiMetroNetwork.csv")
metro_data.head()

Unnamed: 0,Station ID,Station Name,Distance from Start (km),Line,Opening Date,Station Layout,Latitude,Longitude
0,1,Jhil Mil,10.3,Red line,2008-04-06,Elevated,28.67579,77.31239
1,2,Welcome [Conn: Red],46.8,Pink line,2018-10-31,Elevated,28.6718,77.27756
2,3,DLF Phase 3,10.0,Rapid Metro,2013-11-14,Elevated,28.4936,77.0935
3,4,Okhla NSIC,23.8,Magenta line,2017-12-25,Elevated,28.554483,77.264849
4,5,Dwarka Mor,10.2,Blue line,2005-12-30,Elevated,28.61932,77.03326


In [None]:
metro_data.describe()

Unnamed: 0,Station ID,Distance from Start (km),Latitude,Longitude
count,285.0,285.0,285.0,285.0
mean,143.0,19.218947,28.595428,77.029315
std,82.416625,14.002862,0.091316,2.8754
min,1.0,0.0,27.920862,28.698807
25%,72.0,7.3,28.545828,77.10713
50%,143.0,17.4,28.613453,77.20722
75%,214.0,28.8,28.66636,77.281165
max,285.0,52.7,28.878965,77.554479


# Checking for missing values

In [None]:
missing_values=metro_data.isnull().sum()
missing_values

Station ID                  0
Station Name                0
Distance from Start (km)    0
Line                        0
Opening Date                0
Station Layout              0
Latitude                    0
Longitude                   0
dtype: int64

In [None]:
dtype=metro_data.dtypes
dtype

Station ID                    int64
Station Name                 object
Distance from Start (km)    float64
Line                         object
Opening Date                 object
Station Layout               object
Latitude                    float64
Longitude                   float64
dtype: object

# Geospatial Analysis

In [None]:
line_colors={
    'Red line':'red',
    'Blue line':'blue',
    'Yellow line':'beige',
    'Green line':'green',
    'Voilet line':'purple',
    'Pink line':'pink',
    'Magenta line':'darkred',
    'Orange line':'orange',
    'Rapid Metro':'cadetblue',
    'Aqua line':'black',
    'Green line branch':'lightgreen',
    'Blue line branch':'lightblue',
    'Gray line':'lightgray'
}
delhi_map_line_tooltip=fl.Map(location=[28.7041,77.1025], zoom_start=11)

# adding colored markers for each metro station with line in tooltip
for index, row in metro_data.iterrows():
  line=row['Line']
  color=line_colors.get(line,'black')
  fl.Marker(
      location=[row['Latitude'], row['Longitude']],
      popup=f"{row['Station Name']}",
      tooltip=f"{row['Station Name']}, {line}",
      icon=fl.Icon(color=color)
  ).add_to(delhi_map_line_tooltip)

delhi_map_line_tooltip

Each marker represents a metro station

# Temporal Analysis

In [None]:
metro_data['Opening Data']=pd.to_datetime(metro_data['Opening Date'])
metro_data['Opening Year']=metro_data['Opening Data'].dt.year

# counting the no.of stations opened each year
stations_per_year=metro_data['Opening Year'].value_counts().sort_index()

stations_df=stations_per_year.reset_index()
stations_df.columns=['Year','Number of Stations']

fig=px.bar(stations_df,x='Year',y='Number of Stations',title="Number of Metro Stations Opened Each Year In Delhi",
           labels={'Year':'Year','Number of Stations':'Number of Stations Opened'})
fig.update_layout(xaxis_tickangle=-45,
                  xaxis=dict(tickmode='linear'),
                  yaxis=dict(title='Number of Stations Opened'),
                  xaxis_title="Year")
fig.show()

# Line Analysis

In [None]:
stations_per_line=metro_data['Line'].value_counts()

#calculating the total distance of each metro line (max distance from start)
total_distance_perline=metro_data.groupby('Line')['Distance from Start (km)'].max()

avg_distance_perline=total_distance_perline/(stations_per_line - 1)
line_analysis=pd.DataFrame({
    'Line': stations_per_line.index,
    'Number of Stations': stations_per_line.values,
    'Average Distance Between Stations (km)':avg_distance_perline
})

# sorting the DataFrame by the no.of stations
line_analysis=line_analysis.sort_values(by='Number of Stations', ascending=False)
line_analysis.reset_index(drop=True, inplace=True)
print(line_analysis)

                 Line  Number of Stations  \
0           Blue line                  49   
1           Pink line                  38   
2         Yellow line                  37   
3         Voilet line                  34   
4            Red line                  29   
5        Magenta line                  25   
6           Aqua line                  21   
7          Green line                  21   
8         Rapid Metro                  11   
9    Blue line branch                   8   
10        Orange line                   6   
11          Gray line                   3   
12  Green line branch                   3   

    Average Distance Between Stations (km)  
0                                 1.355000  
1                                 1.097917  
2                                 1.157143  
3                                 1.950000  
4                                 1.240000  
5                                 1.050000  
6                                 1.379167  
7        

# Creating Subplots

In [None]:
fig=make_subplots(rows=1, cols=2, subplot_titles=('Number of Stations Per Metro Line',
                                                 'Average Distance Between Stations Per Metro Line'),
                  horizontal_spacing=0.2)

# plot for no.of Stations per Line
fig.add_trace(
    go.Bar(y=line_analysis['Line'], x=line_analysis['Number of Stations'],
             orientation='h', name='Number of Stations', marker_color='crimson'),
    row=1, col=1)

fig.add_trace(
    go.Bar(y=line_analysis['Line'], x=line_analysis['Average Distance Between Stations (km)'],
             orientation='h', name='Average Distance (km)', marker_color='navy'),
    row=1, col=2
)

# update xaxis properties
fig.update_xaxes(title_text="Number Of Stations", row=1, col=1)
fig.update_xaxes(title_text="Average Distance Between Stations (km)", row=1, col=2)

# update yaxis properties
fig.update_xaxes(title_text="Metro Line", row=1, col=1)
fig.update_xaxes(title_text="", row=1, col=2)

# update layout
fig.update_layout(height=600, width=1200, title_text="Metro Line Analysis", template="plotly_white")

fig.show()

# Station Layout Analysis

In [None]:
layout_counts=metro_data['Station Layout'].value_counts()

# creating The bar plot using Plotly
fig=px.bar(x=layout_counts.index, y=layout_counts.values,
           labels={'x': 'Station Layout', 'y':'Number of Stations'},
           title='Distribution of Delhi Metro Station Layouts',
           color=layout_counts.index,
           color_continuous_scale='pastel')

# updating Layout fro better presentation
fig.update_layout(xaxis_title="Station Layout",
                  yaxis_title="Number of Stations",
                  coloraxis_showscale=False,
                  template="plotly_white")
fig.show()