# Economic Data Analysis | Day 1 Demo

Let's get to know the class!

In [1]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np

#file_path = 'https://tayweid.github.io/econ-0150/concepts/concept-0/data/'
file_path = 'data/'

## Data Cleaning

In [2]:
data = pd.read_csv(file_path + 'Fall_2025_Survey_raw.csv')
data.head()

Unnamed: 0,Timestamp,First and Last Initials (eg. Taylor Weidman -> TW),When is your birthday?,What is your favorite color?,Approximately how many miles away from Pittsburgh is your hometown?,What is your (primary) major?,How much did you like your statistics class?,How excited are you for this class?,Do you feel confident in Excel?,How confident do you feel in python?
0,2025/08/26 1:03:40 PM AST,JC,2003-10-12,Blue,400,Accounting,7,9,Yes,10
1,2025/08/26 1:48:49 PM AST,TL,2003-06-06,Blue,16,Other,7,7,No,5
2,2025/08/26 1:49:32 PM AST,LT,2005-07-06,Green,300,Data Science,9,8,Yes,9
3,2025/08/26 1:50:09 PM AST,KM,2004-10-04,Blue,300,Computer Science,8,9,Yes,8
4,2025/08/26 1:50:18 PM AST,DC,2006-09-09,Melon Green,400,Finance,9,10,Yes,9


In [3]:
data = pd.read_csv(file_path + 'Fall_2025_Survey_raw.csv')

# Date
data['When is your birthyear?'] = pd.to_datetime(data['When is your birthday?']).dt.year
data['When is your birthmonth?'] = pd.to_datetime(data['When is your birthday?']).dt.month

# Color
color_mapping = {
    # Standardize capitalization
    'blue': 'Blue',
    'purple': 'Purple', 
    'green': 'Green',
    'red': 'Red',
    'yellow': 'Yellow',
    'pink': 'Pink',
    'orange': 'Orange',
    'black': 'Black',
    
    # Fix typos
    'Greeen': 'Green',
    
    # Group light/dark variations with main color
    'Light Blue': 'Blue',
    'Dark blue': 'Blue',
    'Navy blue': 'Blue', 
    'Navy Blue': 'Blue',
    'Baby Blue': 'Blue',
    
    'Dark Green': 'Green',
    'Dark green': 'Green', 
    'dark green': 'Green',
    'Melon Green': 'Green',  # Assuming this is a green shade
    
    'Deep Red': 'Red',
    
    # Standardize other colors
    'white': 'White',
    'Grey': 'Gray',  # or keep as 'Grey' if you prefer
    'Gold': 'Yellow',  # Gold is yellowish
    
    # Handle the complex answer - they said green as backup
    'Black (if that doesn’t count then probably green)': 'Black',
}
data['What is your favorite color?'] = data['What is your favorite color?'].replace(color_mapping)

# Distance
data['Approximately how many miles away from Pittsburgh is your hometown?'] = data['Approximately how many miles away from Pittsburgh is your hometown?'].str.extract(r'(\d+(?:\.\d+)?)', expand=False).astype(float)

# Save
data.to_csv(file_path + 'Fall_2025_Survey.csv')

## Load The Data

In [None]:
data = pd.read_csv(file_path + 'Fall_2025_Survey.csv')
data.head()

## Birthday

In [None]:
data = pd.read_csv(file_path + 'Fall_2025_Survey.csv')

# Create the plot
bins = np.arange(2000,2010,1)
sns.histplot(data=data, x='year', bins=bins-0.5)
plt.title('Count of Birthdays by Year')
plt.xticks(bins)
plt.ylabel('Year');

## Favorite Color

In [None]:
data['What is your favorite color?'].value_counts()

## Hometown Distance

In [None]:
data['Approximately how many miles away from Pittsburgh is your hometown?'].value_counts()

In [None]:
sns.histplot(x='Approximately how many miles away from Pittsburgh is your hometown?', data=data);

## Primary Major?

In [None]:
data['What is your (primary) major?'].value_counts()

In [None]:
order = data['What is your (primary) major?'].value_counts().index
sns.countplot(data=data, y='What is your (primary) major?', order=order);

## Statistics

In [None]:
sns.histplot(x='How much did you like your statistics class?', data=data, bins=range(0,11,1))

## Statistics and Economic Data Analysis

In [None]:
sns.scatterplot(data=data, x='How much did you like your statistics class?', y='How excited are you for this class?')

In [None]:
x_jitter = data['How much did you like your statistics class?'] + np.random.normal(0, 0.1, len(data))
y_jitter = data['How excited are you for this class?'] + np.random.normal(0, 0.1, len(data))

sns.scatterplot(x=x_jitter, y=y_jitter, alpha=0.7)
plt.xlabel('How much did you like your statistics class?')
plt.ylabel('How excited are you for this class?')

## Economic Data and Excel

In [None]:
sns.countplot(data=data, y='Do you feel confident in Excel?')

In [None]:
# Boxplot for summary
sns.boxplot(data=data, y='Do you feel confident in Excel?', 
            x='How excited are you for this class?')

# Stripplot with manual x jitter
x_jittered = data['How excited are you for this class?'] + np.random.normal(0, 0.1, len(data))
sns.stripplot(y=data['Do you feel confident in Excel?'], 
              x=x_jittered,
              color='firebrick',
              alpha=0.3, s=10)

## Economic Data and Python

In [None]:
data['How confident do you feel in python?'].hist()

In [None]:
x_jitter = data['How confident do you feel in python?'] + np.random.normal(0, 0.2, len(data))
y_jitter = data['How excited are you for this class?'] + np.random.normal(0, 0.2, len(data))

sns.scatterplot(x=x_jitter, y=y_jitter, alpha=0.7)
plt.xlabel('How confident do you feel in python?')
plt.ylabel('How excited are you for this class?')