In [10]:
# Import necessary libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Load the dataset
data = pd.read_excel('../data/Exercise #2.xlsx')

# Clean and trim column names
data.columns = (data.columns
                         .str.strip()                               # Remove leading and trailing spaces
                         .str.replace(r'\s+', '_', regex=True)      # Replace multiple spaces with a single underscore
                         .str.lower()                               # Convert to lowercase
                         .str.replace(r'[^\w]', '_', regex=True)    # Replace non-alphanumeric characters with underscores
                         .str.replace(r'_{2,}', '_', regex=True)    # Replace multiple consecutive underscores with a single underscore
                         .str.strip('_')                            # Remove leading and trailing underscores
                        )

print(data.columns)

# Display the first few rows of the dataset
data.head()

Index(['mirc_subpractice', 'attorney_on_case', 'client_birthdate',
       'client_country_of_origin', 'client_language', 'client_city',
       'immigration_court_location', 'event_type', 'event_date',
       'application_petition_type',
       'application_petition_outcome_date_if_applicable',
       'application_petition_outcome', 'court_hearing_is_remote_or_in_person',
       'reason_for_application_petition_denial_provided_by_uscis',
       'reason_for_case_representation_ended_if_applicable'],
      dtype='object')


Unnamed: 0,mirc_subpractice,attorney_on_case,client_birthdate,client_country_of_origin,client_language,client_city,immigration_court_location,event_type,event_date,application_petition_type,application_petition_outcome_date_if_applicable,application_petition_outcome,court_hearing_is_remote_or_in_person,reason_for_application_petition_denial_provided_by_uscis,reason_for_case_representation_ended_if_applicable
0,Released,BB,2008-02-10,Afghanistan,,Grand Rapids,Detroit,Application/petition filed,2022-09-13 00:00:00,I-485,2022-09-21,Denied,,Fee not included,
1,Released,BB,NaT,Afghanistan,,Kalamazoo,Detroit,Application/petition filed,2022-11-12 00:00:00,I-360,2022-11-23,Denied,,Request for evidence not responded to,
2,Released,DD,2008-03-29,Afghanistan,Pashto,Ypsilanti,Detroit,Application/petition filed,2022-12-16 00:00:00,State Court Petition,NaT,Pending,,,Immigration Relief Granted
3,Released,CC,2005-06-12,Afghanistan,,Traverse City,Detroit,Application/petition filed,2022-09-22 00:00:00,State Court Petition,2022-10-01,Granted,,,
4,Released,CC,2004-03-15,Afghanistan,,Traverse City,Detroit,Application/petition filed,2022-12-22 00:00:00,State Court Petition,NaT,Pending,,,


In [13]:

# Convert date columns to datetime, handling errors and converting non-date entries to NaT
data['event_date'] = pd.to_datetime(data['event_date'], errors='coerce')
data['application_petition_outcome_date_if_applicable'] = pd.to_datetime(data['application_petition_outcome_date_if_applicable'], errors='coerce')

# Fill missing event_date with today's date
data['event_date'] = data['event_date'].fillna(pd.Timestamp.now())

# Fill missing application_petition_outcome with 'Pending'
data['application_petition_outcome'] = data['application_petition_outcome'].fillna('Pending')

# Calculate Processing Time
data['processing_time'] = data['application_petition_outcome_date_if_applicable'].fillna(pd.Timestamp.now()) - data['event_date']
data['processing_time'] = data['processing_time'].dt.days

# Focus on relevant columns for analysis
relevant_columns = ['client_city', 'application_petition_outcome', 'reason_for_application_petition_denial_provided_by_uscis']
data_relevant = data[relevant_columns]

# Drop rows with missing values in critical columns
data_relevant = data_relevant.dropna(subset=['client_city', 'application_petition_outcome'])

# Analyze the number of cases by location
location_outcome_counts = data_relevant.groupby(['client_city', 'application_petition_outcome']).size().unstack(fill_value=0)

# Analyze the reasons for denial by location
denial_reasons = data_relevant[data_relevant['application_petition_outcome'] == 'Denied']
denial_reasons_summary = denial_reasons.groupby('client_city')['reason_for_application_petition_denial_provided_by_uscis'].value_counts().unstack(fill_value=0)

# Print the analysis
print("Case Outcomes by Location:")
print(location_outcome_counts)

print("\nDenial Reasons by Location:")
print(denial_reasons_summary)

# Additional: Summarize key findings
print("\nSummary:")
if 'Detroit' in location_outcome_counts.index:
    detroit_outcome_counts = location_outcome_counts.loc['Detroit', :]
    print("\nDetroit Case Outcomes:")
    print(detroit_outcome_counts)
else:
    print("\nNo data for Detroit.")

if 'Grand Rapids' in location_outcome_counts.index:
    grand_rapids_outcome_counts = location_outcome_counts.loc['Grand Rapids', :]
    print("\nGrand Rapids Case Outcomes:")
    print(grand_rapids_outcome_counts)
else:
    print("\nNo data for Grand Rapids.")

if 'Detroit' in denial_reasons_summary.index:
    print("\nDenial Reasons for Detroit:")
    print(denial_reasons_summary.loc['Detroit', :])
else:
    print("\nNo denial reasons for Detroit.")

if 'Grand Rapids' in denial_reasons_summary.index:
    print("\nDenial Reasons for Grand Rapids:")
    print(denial_reasons_summary.loc['Grand Rapids', :])
else:
    print("\nNo denial reasons for Grand Rapids.")

Case Outcomes by Location:
application_petition_outcome  Denied  Granted  Pending  Withdrawn
client_city                                                      
Detroit                            4        4       12          0
Grand Rapids                      10       14       59          2
Kalamazoo                          1        1       12          1
Lansing                            0        0       10          0
Traverse City                      1        2        5          0
Ypsilanti                          3        4       21          0

Denial Reasons by Location:
reason_for_application_petition_denial_provided_by_uscis  Fee not included  \
client_city                                                                  
Detroit                                                                  1   
Grand Rapids                                                             4   
Kalamazoo                                                                0   
Traverse City             