<a href="https://colab.research.google.com/github/Reben80/Data110-32008--Sp25/blob/main/MC_Tresspassking_Towing_V2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
import plotly.graph_objects as go
import plotly.express as px

# Get the Antique color sequence
colors = px.colors.qualitative.T10
# you can see more info about color in plotly here https://plotly.com/python/discrete-color/


# Read the CSV file and automatically parse 'Tow Date' column as datetime objects
tows = pd.read_csv('/content/tows 10.csv', parse_dates=['Tow Date'])

In [None]:
# Clean and prepare vehicle data
# if you do not use .copy() you get warnning , you get full copy of the data "tows"
vehicle_data = tows.dropna(subset=['Vehicle Year']).copy()
vehicle_data['Vehicle Year'] = vehicle_data['Vehicle Year'].astype(int)
vehicle_data['Vehicle Age'] = 2024 - vehicle_data['Vehicle Year']

# Top 10 vehicle makes
top_makes = vehicle_data['Vehicle Make'].value_counts().head(10)

# Bar chart of top 10 makes
fig1 = go.Figure(go.Bar(
    x=top_makes.values,
    y=top_makes.index,
    orientation='h'
))
fig1.update_layout(title='Top 10 Vehicle Makes Towed')
fig1.show()

# Histogram of vehicle age
fig2 = px.histogram(vehicle_data[vehicle_data['Vehicle Age'] >= 0], x='Vehicle Age', nbins=30, title='Vehicle Age When Towed')
fig2.show()

# you can use vehicle_data[vehicle_data['Vehicle Age'] >= 0] to get only posative

# Save fig1 as an HTML file
fig1.write_html("top_vehicle_makes.html")

# Save fig2 (vehicle age histogram) as an HTML file
fig2.write_html("vehicle_age_histogram.html")


In [None]:
# Count how many times each reason appears in the 'Reason for tow' column
# `value_counts()` returns a Series with reasons as index
# `reset_index()` turns it into a DataFrame so we can rename columns and use it for plotting

reason_counts = tows['Reason for tow'].value_counts().reset_index()

# Rename the columns to make them clearer
reason_counts.columns = ['Reason for tow', 'Count']

# Create a basic treemap
fig = go.Figure(go.Treemap(
    labels=reason_counts['Reason for tow'],  # Each block will be labeled with the reason
    parents=[''] * len(reason_counts),       # All blocks are at the top level (no hierarchy) this is just empy ''
    values=reason_counts['Count']            # Size of each block corresponds to number of tows
))

# Show the treemap
fig.show()


# Save fig2 (vehicle age histogram) as an HTML file
fig.write_html("Treemap.html")

A more detailed version

In [None]:
# Count the number of tows for each reason and prepare the data for visualization
reason_counts = tows['Reason for tow'].value_counts().reset_index()
reason_counts.columns = ['Reason for tow', 'Count']  # Rename columns for clarity

# Create a treemap to show the distribution of towing reasons
fig = go.Figure(go.Treemap(
    labels=reason_counts['Reason for tow'],        # Displayed labels
    parents=[''] * len(reason_counts),              # All entries are top-level (no hierarchy)
    values=reason_counts['Count'],                  # Size of each block based on tow count
    textinfo='label+value',                         # Show both label and value in the block
    hovertemplate='<b>%{label}</b><br>Number of tows: %{value}<extra></extra>'  # Custom hover text
))

# Set layout options for better appearance
fig.update_layout(
    title='Distribution of Towing Reasons',         # Chart title
    width=1000,
    height=600,
    template='plotly_white'                         # Light background style
)

# Display the treemap
fig.show()


In [None]:
# Remove rows where 'Tow Date' is missing
tows_projection_data = tows.dropna(subset=['Tow Date'])

# Create a new column combining year and month (e.g., 2023-07)
tows_projection_data['YearMonth'] = tows_projection_data['Tow Date'].dt.to_period('M')

# Count the number of tows per month
monthly_tows = tows_projection_data.groupby('YearMonth').size().reset_index(name='Tow Count')

# Convert 'YearMonth' to the start of each month (datetime format)
monthly_tows['Month Start'] = monthly_tows['YearMonth'].dt.start_time

# Calculate how many months have passed since the first month in the data
monthly_tows['Months Since Start'] = (monthly_tows['Month Start'] - monthly_tows['Month Start'].min()).dt.days // 30

# Prepare input features (X) and target variable (y) for regression
X = monthly_tows[['Months Since Start']]  # Independent variable
y = monthly_tows['Tow Count']             # Dependent variable


model = LinearRegression()
model.fit(X, y)

# Generate future months (next 12 months after the latest one in the data)
future_months = np.arange(X['Months Since Start'].max() + 1,
                          X['Months Since Start'].max() + 13).reshape(-1, 1)

# Create datetime labels for those future months
future_month_dates = [monthly_tows['Month Start'].max() + pd.Timedelta(days=30 * i) for i in range(1, 13)]

# Predict the number of tows for each future month
future_predictions = model.predict(future_months)


In [None]:


# Create the figure
fig = go.Figure()

# Actual tow data
fig.add_trace(go.Scatter(
    x=monthly_tows['Month Start'],
    y=monthly_tows['Tow Count'],
    mode='lines+markers',
    name='Actual'
))

# Future predictions
fig.add_trace(go.Scatter(
    x=future_month_dates,
    y=future_predictions,
    mode='lines+markers',
    name='Prediction'
))

# Basic layout
fig.update_layout(
    title='Tow Count and Prediction',
    xaxis_title='Month',
    yaxis_title='Tow Count'
)

# Show the chart
fig.show()


A more detaield version

In [None]:


# Create an empty figure
fig = go.Figure()

# Plot the actual monthly tow data
fig.add_trace(go.Scatter(
    x=monthly_tows['Month Start'],
    y=monthly_tows['Tow Count'],
    mode='markers+lines',                  # Show both points and connecting lines
    name='Actual Data',                    # Legend label
    marker=dict(color='#4c78a8'),          # Blue markers
    line=dict(color='#4c78a8')             # Blue lines
))

# Plot the trend line from the regression model
fig.add_trace(go.Scatter(
    x=monthly_tows['Month Start'],
    y=model.predict(X),                    # Predicted values on existing months
    mode='lines',
    name='Trend Line',
    line=dict(color='#54a24b')             # Green solid line
))

# Plot future predictions
fig.add_trace(go.Scatter(
    x=future_month_dates,
    y=future_predictions,
    mode='markers+lines',
    name='Projection',
    marker=dict(color='#e45756'),          # Red markers
    line=dict(color='#e45756', dash='dot') # Red dotted line
))

# Customize the layout of the chart
fig.update_layout(
    title='Monthly Tow Count Trend and Future Projections',
    xaxis_title='Month',
    yaxis_title='Tow Count',
    legend_title='Legend',
    template='plotly_white'                # Clean white background
)

# Show the interactive plot
fig.show()


In [None]:
# Get the top 5 towing companies by total tows
top = tows['Trade Name'].value_counts().head(5)

# Create a simple bar chart
fig = go.Figure(go.Bar(x=top.index, y=top.values))


# Show the chart
fig.show()



In [None]:
# Get the top 5 towing companies by total tows
top = tows['Trade Name'].value_counts().head(5)

# Create a bar chart with custom colors
fig = go.Figure(go.Bar(
    x=top.index,
    y=top.values,
    marker_color=px.colors.qualitative.T10[:5]  # Use first 5 colors from T10 palette
))

# Add labels and title
fig.update_layout(
    title='Top 5 Towing Companies',
    xaxis_title='Company',
    yaxis_title='Number of Tows'
)

# Show the chart
fig.show()


In [None]:
# Step 1: Identify the top 5 most frequent towing companies
top_companies = tows['Trade Name'].value_counts().head(5).index

# Step 2: Keep only rows related to those top 5 companies
top_data = tows[tows['Trade Name'].isin(top_companies)]

# Step 3: Count how many tows each company had per day
#If a company didn’t tow anything on that date, fill with 0.
daily_counts = top_data.groupby(['Tow Date', 'Trade Name']).size().unstack(fill_value=0)

# Step 4: Calculate the average number of daily tows for each company, per month
monthly_avg = daily_counts.resample('M').mean()  # 'M' means calendar month-end



# Step 6: Add a line for each company to the plot
for idx, company in enumerate(monthly_avg.columns):
    fig.add_trace(go.Scatter(
        x=monthly_avg.index,         # Dates on x-axis
        y=monthly_avg[company],      # Average daily tows on y-axis
        mode='lines+markers',        # Show both dots and lines
        name=company                 # Label for the legend
    ))

# Step 7: Customize the layout of the chart
fig.update_layout(
    title='Monthly Average Tow Count (Top 5 Companies)',  # Chart title
    xaxis_title='Month',                                  # X-axis label
    yaxis_title='Average Daily Tows',                     # Y-axis label
    template='plotly_white'                               # Light, clean look
)

# Step 8: Show the chart
fig.show()



In [None]:
# Get the top 5 most common towing companies by name
top_companies = tows['Trade Name'].value_counts().head(5).index

# Filter the dataset to only include rows from the top 5 companies
top_companies_data = tows[tows['Trade Name'].isin(top_companies)]

# Group by date and company, count the number of tows, and reshape for plotting
top_five_daily_counts = top_companies_data.groupby(['Tow Date', 'Trade Name']).size().unstack(fill_value=0)

# Resample to monthly frequency (month-end) and compute the average daily tows for each company
monthly_avg = top_five_daily_counts.resample('ME').mean()

# Create the figure
fig = go.Figure()

# Plot a line for each of the top 5 companies
for idx, company in enumerate(monthly_avg.columns):
    fig.add_trace(go.Scatter(
        x=monthly_avg.index,
        y=monthly_avg[company],
        mode='lines+markers',
        name=company,
        line=dict(color=colors[idx % len(colors)]),  # Use color from palette
        hovertemplate='<b>%{x}</b><br>Average: %{y:.2f}<extra></extra>'  # Custom hover text
    ))

# Update chart layout and appearance
fig.update_layout(
    title='Monthly Average Daily Tow Counts for Top 5 Companies',
    xaxis_title='Month',
    yaxis_title='Average Daily Tow Count',
    legend_title='Company',
    template='plotly_white',
    width=1000,
    height=800
)

# Add a range slider and date selector for zooming
fig.update_xaxes(
    rangeslider_visible=True,
    rangeselector=dict(
        buttons=list([
            dict(count=1, label='1m', step='month', stepmode='backward'),
            dict(count=6, label='6m', step='month', stepmode='backward'),
            dict(count=1, label='YTD', step='year', stepmode='todate'),
            dict(count=1, label='1y', step='year', stepmode='backward'),
            dict(step='all')
        ])
    )
)

# Show the interactive plot
fig.show()


In [None]:
# Calculate month-over-month change in average tows
monthly_change = monthly_avg.diff()

# Create the figure
fig = go.Figure()

# Add a line for each company
for company in monthly_change.columns:
    fig.add_trace(go.Scatter(
        x=monthly_change.index,
        y=monthly_change[company],
        mode='lines',
        name=company
    ))

# Add basic layout
fig.update_layout(
    title='Monthly Change in Average Tow Counts',
    xaxis_title='Month',
    yaxis_title='Change in Tows'
)

# Show the chart
fig.show()


More Detailed Version

In [None]:
# Calculate the month-over-month rate of change in average daily tows
monthly_rate_of_change = monthly_avg.diff()

# Create the figure
fig = go.Figure()

# Add one line per company to show changes over time
for idx, company in enumerate(monthly_rate_of_change.columns):
    fig.add_trace(go.Scatter(
        x=monthly_rate_of_change.index,
        y=monthly_rate_of_change[company],
        name=company,
        mode='lines',
        line=dict(color=colors[idx % len(colors)])  # Reuse consistent colors
    ))

# Customize layout
fig.update_layout(
    title='Month-Over-Month Change in Tow Counts (Top 5 Companies)',
    xaxis_title='Month',
    yaxis_title='Change in Avg Daily Tows',
    width=820,
    height=580,
    showlegend=True,
    legend_title='Towing Company',
    template='plotly_white'
)

# Style x- and y-axes
fig.update_xaxes(
    showgrid=False,
    linecolor='black',
    linewidth=2
)

fig.update_yaxes(
    showgrid=True,
    gridcolor='LightGray',
    linecolor='black',
    linewidth=2
)

# Add horizontal line at y = 0 for visual reference
fig.add_hline(y=0, line_width=1, line_color='black')

# Show the figure
fig.show()


In [None]:
# Group data by tract and calculate mean values
grouped = tows.groupby('TractFIPS').agg({
    'median_household_income': 'mean',
    'pop_density': 'mean',
    'cei': 'mean',
    'geoid': 'count'  # number of tows
}).reset_index()

# Rename column for clarity
grouped.rename(columns={'geoid': 'number_of_tows'}, inplace=True)

# Create basic bubble chart
fig = px.scatter(
    grouped,
    x='median_household_income',
    y='pop_density',
    size='number_of_tows',
    color='cei',
    title='Tows by Income and Population Density'
)

# Show the chart
fig.show()


In [None]:
# Group the data by 'TractFIPS' to summarize statistics for each geographic tract

# Montgomery County (MC) is divided into 148 census tracts
grouped_data = tows.groupby('TractFIPS').agg({
    'median_household_income': 'mean',  # Average income per tract
    'pop_density': 'mean',              # Average population density per tract
    'cei': 'mean',                      # Average Community Equity Index per tract
    'geoid': 'count'                    # Count of tows per tract (assuming each row is one tow)
}).reset_index()

# Rename 'geoid' column to make its purpose clearer
grouped_data.rename(columns={'geoid': 'number_of_tows'}, inplace=True)

# Create a bubble chart
fig = px.scatter(
    grouped_data,
    x='median_household_income',       # X-axis: Income
    y='pop_density',                   # Y-axis: Population density
    size='number_of_tows',             # Bubble size: Tow count
    color='cei',                       # Bubble color: CEI
    color_continuous_scale='RdYlBu',   # Color palette (Red-Yellow-Blue)
    size_max=50,                       # Limit max bubble size
    hover_data={                       # Hover formatting
        'median_household_income': ':,.0f',
        'pop_density': ':,.0f',
        'number_of_tows': True,
        'cei': ':.2f'
    },
    template='simple_white',
    title='Income vs Population Density with Number of Towing Incidents',
    labels={
        'median_household_income': 'Median Household Income',
        'pop_density': 'Population Density',
        'number_of_tows': 'Number of Tows',
        'cei': 'Community Equity Index'
    }
)

# Customize the layout for a cleaner look
fig.update_layout(
    plot_bgcolor='white',
    width=850,
    height=550,
    coloraxis_colorbar_title='CEI',  # Title for color bar
    title_font_size=18
)

# Add black outline to all bubbles for better visibility
fig.update_traces(marker=dict(line=dict(color='black', width=1)))

# Show the chart
fig.show()
