<a href="https://colab.research.google.com/github/oceane0815/Pink_Tax/blob/main/GT_summarystats_linegraph.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [54]:
import pandas as pd

%cd /content/Pink_Tax/

###################### Descrpitive Statistics for Google Trends Data #############################
# URL of the merged CSV file in your GitHub repository
url = 'https://raw.githubusercontent.com/oceane0815/Pink_Tax/main/google_trends_merged_updated.csv'

# Read the merged CSV file
df = pd.read_csv(url)

# summary statistics
summary_stats = df.drop(columns='Month').describe().T

# Sort the summary_stats based on the 'mean' column
sorted_summary_stats = summary_stats.sort_values(by='mean', ascending=False)
print(sorted_summary_stats)

# Export to Latex code
latex_summary_stats = sorted_summary_stats.to_latex()
print(latex_summary_stats)

# Save the LaTeX code to a .tex file
with open('summary_stats_table.tex', 'w') as f:
    f.write(latex_summary_stats)

print("LaTeX table saved as 'summary_stats_table.tex'")
from google.colab import files

# Download the .tex file from Colab's file system
files.download('/content/Pink_Tax/summary_stats_table.tex')


####################### Line Graph for Cumulative Google Trends Data ##############################
import matplotlib.pyplot as plt

# URL of the merged CSV file in your GitHub repository
url_cum = 'https://raw.githubusercontent.com/oceane0815/Pink_Tax/main/cumulative_sum_results_updated.csv'

# Read the merged cumulative CSV file
df_cum = pd.read_csv(url_cum)

# Set the 'Month' column as the index for better plotting
df_cum['Month'] = pd.to_datetime(df['Month'])  # Ensure 'Month' is in datetime format
df_cum.set_index('Month', inplace=True)

# Remove the "_cumulative" suffix from the column names to get just the state names
df_cum.columns = df_cum.columns.str.replace('_Cumulative', '', regex=False)

# Sort the states based on mean values (from summary statistics)
sorted_states = sorted_summary_stats.index

# Divide into top, middle, and lowest 17 states
top_17_states = sorted_states[:17]       # First 17 states (highest means)
middle_17_states = sorted_states[17:34]  # Middle 17 states
lowest_17_states = sorted_states[34:]    # Last 17 states (lowest means)

# Filter the DataFrame based on these groups
top_17_df = df_cum[top_17_states]
middle_17_df = df_cum[middle_17_states]
lowest_17_df = df_cum[lowest_17_states]
print(top_17_df)
print(middle_17_df)
print(lowest_17_df)

# Plot for top 17 states
plt.figure(figsize=(12, 8))
for state in top_17_df.columns:
    plt.plot(top_17_df.index, top_17_df[state], label=state)
plt.xlabel('Month')
plt.ylabel('Cumulative Google Trends Value')
plt.title('Top 17 States by Mean Google Trends Value')
plt.legend(loc='upper left', ncol=2)

# Save the figure to a file
plt.tight_layout()
plt.savefig('fig_cum_top_17_states.png')
plt.show()

# Plot for middle 17 states
plt.figure(figsize=(12, 8))
for state in middle_17_df.columns:
    plt.plot(middle_17_df.index, middle_17_df[state], label=state)
plt.xlabel('Month')
plt.ylabel('Cumulative Google Trends Value')
plt.title('Middle 17 States by Mean Google Trends Value')
plt.legend(loc='upper right', ncol=2)

# Save the figure to a file
plt.tight_layout()
plt.savefig('fig_cum_middle_17_states.png')
plt.show()

# Plot for lowest 17 states
plt.figure(figsize=(12, 8))
for state in lowest_17_df.columns:
    plt.plot(lowest_17_df.index, lowest_17_df[state], label=state)
plt.xlabel('Month')
plt.ylabel('Cumulative Google Trends Value')
plt.title('Lowest 17 States by Mean Google Trends Value')
plt.legend(loc='upper right', ncol=2)

# Save the figure to a file
plt.tight_layout()
plt.savefig('fig_cum_lowest_17_states.png')
plt.show()

# Download the saved figures
files.download('fig_cum_top_17_states.png')
files.download('fig_cum_middle_17_states.png')
files.download('fig_cum_lowest_17_states.png')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>