## Import

In [2]:
import dash
from dash import dcc
from dash import html
import plotly.express as px
import pandas as pd
from dash.dependencies import Input, Output

In [9]:
# Import packages
import pandas as pd

### DATA IMPORTING AND TREATMENT

# Specify the path to CSV file
file_path = 'https://raw.githubusercontent.com/mafaldamartins1/ProjectDV/main/dataset_US_executions.csv'

# Read the CSV file into a pandas dataframe
df = pd.read_csv(file_path)

# Remove space in the beginning of ' Male' from the 'Sex' column
df['Sex'] = df['Sex'].replace(' Male','Male')

# Join 'no' values with 'No' in the 'Foreign National' column
df['Foreign National'] = df['Foreign National'].replace('no','No')

# Change the type of the 'Execution Date' column to Date
df['Execution Date'] = pd.to_datetime(df['Execution Date'])
df['Execution Year'] = df['Execution Date'].dt.year

# Join both 'White' values of column 'Race' together
df.loc[df['Race'].str.startswith('White'), 'Race'] = 'White'
# Join both 'South' values of column 'Region' together
df.loc[df['Region'].str.startswith('South'), 'Region'] = 'South'

# Remove space in the end of 'Oklahoma ' from the 'State' column
df['State'] = df['State'].replace('Oklahoma ','Oklahoma')

# Join 'Multiple' and 'Multiple (including White)' in the 'Victim(s) Race(s)' column
df['Victim(s) Race(s)'] = df['Victim(s) Race(s)'].replace('Multiple (including White)','Multiple')

# Remove columns with missing values - 'Middle Name(s)' and 'Suffix'
df.drop(columns=["Middle Name(s)", "Suffix"])

# Create new columns for the number of victims per race
df['Number of White Victims'] = df['Number of White Male Victims'] + df['Number of White Female Victims']
df['Number of Black Victims'] = df['Number of Black Male Victims'] + df['Number of Black Female Victims']
df['Number of Latino Victims'] = df['Number of Latino Male Victims'] + df['Number of Latino Female Victims']
df['Number of Asian Victims'] = df['Number of Asian Male Victims'] + df['Number of Asian Female Victims']
df['Number of Native American Victims'] = df['Number of Native American Male Victims'] + df['Number of American Indian or Alaska Native Female Victims']
df['Number of Other Race Victims'] = df['Number of Other Race Male Victims'] + df['Number of Other Race Female Victims']

# drop das outras?

state_codes = {
    'Alabama': 'AL',
    'Alaska': 'AK',
    'Arizona': 'AZ',
    'Arkansas': 'AR',
    'California': 'CA',
    'Colorado': 'CO',
    'Connecticut': 'CT',
    'Delaware': 'DE',
    'Florida': 'FL',
    'Georgia': 'GA',
    'Hawaii': 'HI',
    'Idaho': 'ID',
    'Illinois': 'IL',
    'Indiana': 'IN',
    'Iowa': 'IA',
    'Kansas': 'KS',
    'Kentucky': 'KY',
    'Louisiana': 'LA',
    'Maine': 'ME',
    'Maryland': 'MD',
    'Massachusetts': 'MA',
    'Michigan': 'MI',
    'Minnesota': 'MN',
    'Mississippi': 'MS',
    'Missouri': 'MO',
    'Montana': 'MT',
    'Nebraska': 'NE',
    'Nevada': 'NV',
    'New Hampshire': 'NH',
    'New Jersey': 'NJ',
    'New Mexico': 'NM',
    'New York': 'NY',
    'North Carolina': 'NC',
    'North Dakota': 'ND',
    'Ohio': 'OH',
    'Oklahoma': 'OK',
    'Oregon': 'OR',
    'Pennsylvania': 'PA',
    'Rhode Island': 'RI',
    'South Carolina': 'SC',
    'South Dakota': 'SD',
    'Tennessee': 'TN',
    'Texas': 'TX',
    'Utah': 'UT',
    'Vermont': 'VT',
    'Virginia': 'VA',
    'Washington': 'WA',
    'West Virginia': 'WV',
    'Wisconsin': 'WI',
    'Wyoming': 'WY'
}
df['Race'] = df['Race'].replace('American Indian or Alaska Native','Native American')

# Create a new column in your dataframe that maps state names to state codes
df['State Code'] = df['State'].map(state_codes)

print(df.columns)

print(df['Victim(s) Race(s)'].unique())

Index(['Execution Volunteer', 'Number of Victims', 'Juvenile', 'First Name',
       'Last Name', 'Middle Name(s)', 'Suffix', 'Race', 'Sex', 'Region',
       'Country', 'State', 'Foreign National', 'Execution Date',
       'Number of White Male Victims', 'Number of Black Male Victims',
       'Number of Latino Male Victims', 'Number of Asian Male Victims',
       'Number of Native American Male Victims',
       'Number of Other Race Male Victims', 'Number of White Female Victims',
       'Number of Black Female Victims', 'Number of Latino Female Victims',
       'Number of Asian Female Victims',
       'Number of American Indian or Alaska Native Female Victims',
       'Number of Other Race Female Victims', 'Victim(s) Race(s)',
       'Execution Year', 'Number of White Victims', 'Number of Black Victims',
       'Number of Latino Victims', 'Number of Asian Victims',
       'Number of Native American Victims', 'Number of Other Race Victims',
       'State Code'],
      dtype='object')
['

# Visualization

In [85]:
pivot_df = pd.pivot_table(df, values='ID', index=['Race'], columns=['Region'], aggfunc='count').sort_values("South", ascending = False)

In [77]:
pivot_df

Region,Midwest,Northeast,South,West
Race,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Asian,,,5.0,2.0
Black,65.0,,460.0,7.0
Latinx,,,124.0,6.0
Native American,1.0,,13.0,6.0
Other Race,,,3.0,
White,129.0,4.0,668.0,68.0


In [86]:
pivot_df = pd.pivot_table(df, values='ID', index=['Race'], columns=['Region'], aggfunc='count').sort_values("South", ascending = False)
fig = px.bar(pivot_df, x= pivot_df.index, y = ["South", "West", "Midwest", "Northeast"])

In [59]:
fig = px.bar(df, x= "Race", y = df.index, color = "Region")
fig.show()

In [60]:
counts = df.index.value_counts()

# Create a bar chart with the counts
fig = px.bar(x=counts.index, y=counts.values, color="Region", labels={"x":"Race", "y":"Count"})
fig.show()

In [53]:
fig = px.bar(df, x= "Race", y = df.index, color = "Region")
fig.show()