```markdown
Assignment 2
Nazmul Hasan Rabbi
CISC 0672: Data Visualization
```


## Project Setup
Importing required packages required for this assignment


In [1]:
# importing packages
import pandas as pd
import numpy as np
import calendar
import plotly.graph_objects as go
import plotly.express as px

### Exercise 1
Go to https://www.ncdc.noaa.gov/cag/city/time-series/ and retrieve time-series temperature data for four to six cities of your choice.

Use Excel to combine these datasets into a single file *cityTempsSeries.csv* with temperature data for your cities. This has the same format as the file read at the top of this notebook, but it has data for your chosen cities.

Then use the techniques used in this notebook above to create a dataframe with the format given above.

In [2]:
# reading in my temp dataset (2001 Jan - 2024 Apr), it was combined using excel
df = pd.read_csv('./data/cityTempsSeries.csv')

# seeing the first few rows of the dataset
df.head()

Unnamed: 0,Date,Charlotte,Columbus,Fresno,Honolulu
0,200101,41.8,28.4,46.6,76.3
1,200102,48.9,35.1,49.1,75.1
2,200103,49.8,37.8,59.1,77.1
3,200104,62.4,56.5,59.0,78.5
4,200105,69.3,63.3,77.7,79.4


In [3]:
# Convert the Date column to datetime
df.Date = pd.to_datetime(df.Date, format='%Y%m')

# Extract the year and month from the Date column
df['Year'], df['Month'] = df['Date'].dt.year, df['Date'].dt.month.apply(lambda x: calendar.month_abbr[x])

# Drop the Date column
df.drop(columns='Date', inplace=True)

# Reorder the columns
df = df[['Year', 'Month'] + [col for col in df.columns if col not in ['Year', 'Month']]]

# See the last few rows of the updated dataset
df.tail()

Unnamed: 0,Year,Month,Charlotte,Columbus,Fresno,Honolulu
275,2023,Dec,47.7,41.8,53.2,76.8
276,2024,Jan,43.4,31.8,51.5,76.2
277,2024,Feb,49.8,40.6,54.1,75.8
278,2024,Mar,56.2,46.6,57.8,76.3
279,2024,Apr,63.4,56.9,63.1,78.7


### Exercise 2
For this exercise, define the function

    plot_one_month(frame, month, cities)

In [4]:
def plot_one_month(df, month, cities):
    # Filter DataFrame for the given month
    df = df[df['Month'] == month]

    # Create a figure with a line for each city
    fig = go.Figure([go.Scatter(x=df['Year'], y=df[city], mode='lines', name=city) for city in cities if city in df.columns])

    # Update the layout of the figure with title, axis labels, and legend title
    fig.update_layout(title={'text': f'Temperature over Years in {month} (2001-2024)', 'y':0.9, 'x':0.5, 'xanchor': 'center', 'yanchor': 'top'}, xaxis_title="Year", yaxis_title="Temperature (°F)", legend_title="Cities")

    # Display the figure
    fig.show()

# Test the function by plotting temperature over years for Charlotte, Columbus, Fresno, and Honolulu in March
plot_one_month(df, 'Mar', ['Charlotte', 'Columbus', 'Fresno', 'Honolulu'])

### Exercise 3
For this excercise, write a version of function `barplot_year_month` that generates a bar graph whose bars are ordered by descending height (from warmest to coldest).

    barplot_year_month(frame, 1980, 'Jan')

In [5]:
def barplot_year_month(df, year, month):
    # Get the list of cities and filter the DataFrame
    rcd = df[(df.Year == year) & (df.Month == month)][df.columns[2:]]

    # Check if rcd is empty
    if rcd.empty: return print(f"No data for year {year} and month {month}")

    # Create a new DataFrame, sort it, create a bar plot and updating the layout
    fig = px.bar(df := pd.DataFrame({'cities': df.columns[2:], 'temps': rcd.values[0]}).sort_values('temps', ascending=False), x='cities', y='temps').update_layout(title_text=f'{month} {year} Average Temperatures by City', title_x=0.5, legend_title_text='cities').update_yaxes(title_text='Temperature (°F)').update_xaxes(title_text='Cities')

    # Return the plot
    return fig

# Test the function with year 2017 and month 'Dec'
barplot_year_month(df, 2017, 'Dec')

### Exercise 4
Write the function `dotplot_one_year(frame, year)` that returns the figure for such a plot.

In [6]:
def dotplot_one_year(df, year):
    # Filter the DataFrame for the given year
    rcd = df[df.Year == year]

    # Check if rcd is empty
    if rcd.empty: return print(f"No data for year {year}")

    # Create an empty figure and add a scatter plot for each city
    fig = go.Figure(data=[go.Scatter(x=[city]*len(rcd), y=rcd[city], mode='markers', name=city, marker=dict(size=12)) for city in df.columns[2:]])

    # Set the title, center it, and remove the legend
    fig.update_layout(title_text=f'Temperatures for Year {year}', title_x=0.5, showlegend=False)

    # Add labels to the x and y axes
    fig.update_xaxes(title_text='Cities')
    fig.update_yaxes(title_text='Temperature (°F)')

    # Return the plot
    return fig

# Test the function with the year 2001
dotplot_one_year(df, 2001)

### Exercise 5
Write the function `scatterplot_one_month(df, month)` that generates a scatterplot for each city on the same plot, which includes a best-fit line for each city.

In [7]:
def scatterplot_one_month(df, month):
    # Filter data for the given month
    df_month = df[df.Month == month]
    if df_month.empty:
        return print(f"No data for month {month}")

    # Initialize figure
    fig = go.Figure()
    colors = ['blue', 'orange', 'green', 'red', 'purple', 'brown', 'pink', 'gray', 'olive', 'cyan']

    # Loop over cities (columns from index 2 onwards)
    for i, city in enumerate(df.columns[2:]):
        # Get x and y data
        x, y = df_month.Year, df_month[city]

        # Calculate best-fit line
        poly = np.poly1d(np.polyfit(x, y, 1))

        # Add scatter plot and best-fit line to figure
        fig.add_trace(go.Scatter(x=x, y=y, mode='markers', name=city, marker_color=colors[i%len(colors)]))
        fig.add_trace(go.Scatter(x=x, y=poly(x), mode='lines', showlegend=False, line_color=colors[i%len(colors)]))

    # Update layout and axes titles
    fig.update_layout(title_text=f'Temperatures for Month {month}', title_x=0.5, showlegend=True, legend_title_text='Cities')
    fig.update_xaxes(title_text='Year')
    fig.update_yaxes(title_text='Temperature (°F)')

    return fig

# Test the funtion with 'Jan' as the month
scatterplot_one_month(df, 'Jan')

## Exercise 6

Define a function `plot_primes(m, n)` that plots a matrix heatmap of an $m\times n$ array $P$ where entry $P[i][j]$ has one color if the integer $p = n*i+j$ is prime, a different color if $p$ is composite. Two examples:

    plot_primes(10, 10)
    plot_primes(20, 20, color_sequence=px.colors.qualitative.Dark24)

In [8]:
# provided code
def is_primes_upto(n):
    is_prime = np.ones((n+1,), dtype=bool)
    is_prime[:2] = False
    lim = int(np.sqrt(len(is_prime))) + 1
    for i in range(2, lim):
        is_prime[2*i::i] = False
    return is_prime

In [9]:
def plot_primes(m, n, color_sequence=['RoyalBlue', 'Gold']):
    # Calculate primes and reshape to m*n matrix
    primes = is_primes_upto(m*n)[:m*n].reshape((m, n))

    # Create a plot with color scale based on prime status
    fig = px.imshow(primes, color_continuous_scale=color_sequence)

    # Hide color scale and set layout
    fig.update_layout(coloraxis_showscale=False, autosize=False, margin=dict(l=0, r=0, t=10, b=10))

    # Display the plot
    fig.show()

# Test the function with default and custom color sequences
plot_primes(10, 10)
plot_primes(20, 20, px.colors.qualitative.Dark24)