In [6]:
import dash
from dash import dcc, html, Input, Output
import plotly.express as px
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import os
from scipy import stats
from IPython.display import display


In [7]:
climate = pd.read_csv('climate_influencers.csv')
climate.drop(columns=['Date'], inplace=True)


In [8]:

# Specify the path to the "states" folder
states_folder = 'states'

# Initialize a dictionary to store the DataFrames
state_dfs = {}

# Iterate through all files in the states folder
for filename in os.listdir(states_folder):
    if filename.endswith('.csv'):
        # Extract the state name (without .csv extension)
        state_name = os.path.splitext(filename)[0].upper()
        
        # Read the CSV file
        file_path = os.path.join(states_folder, filename)
        df = pd.read_csv(file_path)
        
        # Rename the 'MeanPoints' column to the state name
        df = df.rename(columns={'MeanPoints': state_name})
        
        # Keep only 'Year', 'Period', and the renamed state column
        df = df[['Year', 'Period', state_name]]
        
        # Store the DataFrame in the dictionary
        state_dfs[state_name] = df

# Merge all state DataFrames
merged_df = state_dfs[list(state_dfs.keys())[0]]  # Start with the first state
for state in list(state_dfs.keys())[1:]:  # Merge the rest of the states
    merged_df = pd.merge(merged_df, state_dfs[state], on=['Year', 'Period'], how='outer')

merged_df = pd.merge(merged_df, climate)

# Sort the DataFrame by Year and Period
merged_df['Date'] = pd.to_datetime(merged_df['Year'].astype(str) + '-' + merged_df['Period'], format='%Y-%b')
merged_df = merged_df.sort_values('Date')

# Drop the 'Date' column if you don't need it
merged_df = merged_df.drop(columns=['Date'])

In [9]:
# merged_df.to_csv('merged_state_climate.csv')

In [10]:
# Create a datetime column
merged_df['Date'] = pd.to_datetime(merged_df['Year'].astype(str) + '-' + merged_df['Period'], format='%Y-%b')

app = dash.Dash(__name__)

# Define custom CSS for the dropdown
custom_style = {
    'dropdown': {
        'font-family': 'Arial, sans-serif',
        'width': '200px',
        'margin-right': '10px'
    }
}

# Define color map
color_map = {
    'QLD': '#00208F',
    'NSW': '#A90018',
    'SA': '#1EBA9F',
    'VIC': '#FA2449',
    'NT': '#FFA500',
    'WA': '#0036F9'
}

app.layout = html.Div([
    dcc.Graph(id='scatter-plot', style={'height': '80vh'}, config={'scrollZoom': False} ),
    html.Div([
        html.Div([
            html.Label("Select State:", style={'font-family': 'Arial, sans-serif'}),
            dcc.Dropdown(
                id='state-dropdown',
                options=[{'label': state, 'value': state} for state in merged_df.columns if state not in ['Year', 'Period', 'ENSO', 'SAM', 'IOD', 'Date']],
                value=merged_df.columns[2],  # Default to the first state
                style=custom_style['dropdown']
            ),
        ], style={'position': 'absolute', 'top': '50px', 'left': '60px', 'zIndex': 1000}),
        html.Div([
            html.Label("Select Climate Influencer:", style={'font-family': 'Arial, sans-serif'}),
            dcc.Dropdown(
                id='climate-dropdown',
                options=[{'label': influencer, 'value': influencer} for influencer in ['ENSO', 'SAM', 'IOD']],
                value='ENSO',  # Default to ENSO
                style=custom_style['dropdown']
            ),
        ], style={'position': 'absolute', 'top': '50px', 'right': '60px', 'zIndex': 1000})
    ])
])

@app.callback(
    Output('scatter-plot', 'figure'),
    [Input('state-dropdown', 'value'),
     Input('climate-dropdown', 'value')]
)
def update_graph(selected_state, selected_climate):
    # Filter out rows with NaN values
    filtered_df = merged_df.dropna(subset=[selected_state, selected_climate])
    
    fig = px.scatter(filtered_df, x=selected_state, y=selected_climate,
                     hover_data=['Year', 'Period'],
                     labels={selected_state: f"{selected_state} Mean Points",
                             selected_climate: selected_climate})
    
    # Update marker color
    fig.update_traces(marker=dict(color=color_map[selected_state]))
    
    # Add regression line and calculate statistics
    x = filtered_df[selected_state]
    y = filtered_df[selected_climate]
    slope, intercept, r_value, p_value, std_err = stats.linregress(x, y)
    r_squared = r_value**2
    line_x = np.array([x.min(), x.max()])
    line_y = slope * line_x + intercept
    
    fig.add_trace(px.line(x=line_x, y=line_y).data[0])
    
    # Customize hover template
    fig.update_traces(
        hovertemplate="<b>Date</b>: %{customdata[0]}-%{customdata[1]}<br>" +
                      f"<b>{selected_state} Mean Points</b>: %{{x:.2f}}<br>" +
                      f"<b>{selected_climate}</b>: %{{y:.2f}}<extra></extra>"
    )

    # Update layout for white background and other improvements
    fig.update_layout(
        autosize=True,
        plot_bgcolor='white',
        paper_bgcolor='white',
        title={
            'text': f"{selected_state} Mean Points vs {selected_climate}<br>RÂ² = {r_squared:.4f}, p-value = {p_value:.4f}",
            'y':0.96,
            'x':0.5,
            'xanchor': 'center',
            'yanchor': 'top',
            'font': dict(family="Arial, sans-serif", size=18)
        },
        font=dict(family="Arial, sans-serif"),
        xaxis=dict(
            title=f"{selected_state} Mean Points",
            showline=True,
            showgrid=False,
            showticklabels=True,
            linecolor='rgb(204, 204, 204)',
            linewidth=2,
            ticks='outside',
            tickfont=dict(
                family='Arial, sans-serif',
                size=12,
                color='rgb(82, 82, 82)',
            ),
        ),
        yaxis=dict(
            title=selected_climate,
            showgrid=True,
            gridcolor='rgb(235, 235, 235)',
            showline=True,
            showticklabels=True,
            linecolor='rgb(204, 204, 204)',
            linewidth=2,
            ticks='outside',
            tickfont=dict(
                family='Arial, sans-serif',
                size=12,
                color='rgb(82, 82, 82)',
            ),
        ),
        margin=dict(t=100, b=50, l=60, r=60)  # Increased top margin to accommodate the longer title
    )
    
    return fig


if __name__ == '__main__':
    app.run_server(debug=True, port=8054)