**VISUALIZATION**

In [10]:
import pandas as pd
import plotly.express as px

# Load the dataset
df = pd.read_csv('Trips_by_Distance.csv')

# Convert the Date column to datetime format
df['Date'] = pd.to_datetime(df['Date'])

# Filter data for the year 2022
df_2022 = df[df['Date'].dt.year == 2022]

# Aggregate the number of trips by state
state_trip_data = df_2022.groupby('State Postal Code').agg({
    'Number of Trips': 'sum'
}).reset_index()

# Display the aggregated data
state_trip_data.head()


Unnamed: 0,State Postal Code,Number of Trips
0,AK,1924401000.0
1,AL,13331750000.0
2,AR,8080622000.0
3,AZ,18941870000.0
4,CA,94711240000.0


In [11]:
# Create the bar plot
fig = px.bar(
    state_trip_data,
    x='State Postal Code',
    y='Number of Trips',
    title='Number of Trips by State in 2022',
    labels={'State Postal Code': 'State', 'Number of Trips': 'Number of Trips'},
    template='plotly_white'
)

# Display the plot
fig.show()


In [12]:
# Create the choropleth map
fig = px.choropleth(
    state_trip_data,
    locations='State Postal Code',
    locationmode='USA-states',
    color='Number of Trips',
    color_continuous_scale='Blues',
    scope='usa',
    title='Number of Trips by State in 2022',
    labels={'State Postal Code': 'State', 'Number of Trips': 'Number of Trips'}
)

# Display the map
fig.show()


In [13]:
# Filter data for the years 2019 to 2022
df_filtered = df[(df['Date'].dt.year >= 2019) & (df['Date'].dt.year <= 2022)]

# Extract year from Date
df_filtered['Year'] = df_filtered['Date'].dt.year

# Aggregate the number of trips by state and year
state_year_data = df_filtered.groupby(['State Postal Code', 'Year']).agg({
    'Number of Trips': 'sum'
}).reset_index()

# Create the plot
fig = px.bar(
    state_year_data,
    x='State Postal Code',
    y='Number of Trips',
    color='Year',
    barmode='group',
    title='Number of Trips by State (2019-2022)',
    labels={'State Postal Code': 'State', 'Number of Trips': 'Number of Trips'}
)

# Display the plot
fig.show()




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [15]:
# Filter data for Illinois state and the year 2022
df_il_2022 = df[(df['State Postal Code'] == 'IL') & (df['Date'].dt.year == 2022)]

# Aggregate the number of trips by county and distance categories
county_trip_data = df_il_2022.groupby('County Name').agg({
    'Number of Trips <1': 'sum',
    'Number of Trips 1-3': 'sum',
    'Number of Trips 3-5': 'sum',
    'Number of Trips 5-10': 'sum',
    'Number of Trips 10-25': 'sum',
    'Number of Trips 25-50': 'sum',
    'Number of Trips 50-100': 'sum',
    'Number of Trips 100-250': 'sum',
    'Number of Trips 250-500': 'sum',
    'Number of Trips >=500': 'sum'
}).reset_index()

# Melt the dataframe for easier plotting
melted_data = county_trip_data.melt(id_vars=['County Name'], 
                                    var_name='Trip Distance', 
                                    value_name='Number of Trips')

# Create the bar plot
fig = px.bar(
    melted_data,
    x='County Name',
    y='Number of Trips',
    color='Trip Distance',
    title='Number of Trips with Different Distances by County in Illinois (2022)',
    labels={'County Name': 'County', 'Number of Trips': 'Number of Trips'},
    barmode='stack'
)

# Display the plot
fig.show()