In [47]:
import matplotlib.pyplot as plt
import pandas as pd 
import seaborn as sns 
import numpy as np 
import scipy.stats 
import plotly.express as px
import plotly.figure_factory as ff
import sys
import os
import dotenv

In [3]:
current_dir = os.getcwd()
project_dir = os.path.abspath(os.path.join(current_dir, os.pardir))
src_path = os.path.join(project_dir, 'src')
sys.path.append(src_path)

from utils import load_env_vars
load_env_vars()

# Importing the Cleaned Dataset

In [4]:
us_immigration_final_csv = os.getenv('processed_us_immigration_csv')
df = pd.read_csv(us_immigration_final_csv)

In [30]:
df.head()

Unnamed: 0,year,lawful_permanent_resident_obt,refugee_arrivals,noncitizen_apprehensions,noncitizen_removals,noncitizen_returns,president,party,term
0,1980,524295,207116,910361,18013,719211,Jimmy Carter,Democratic,First
1,1981,595014,159252,975780,17379,823875,Ronald Reagan,Republican,First
2,1982,533624,98096,970246,15216,812572,Ronald Reagan,Republican,First
3,1983,550052,61218,1251357,19211,931600,Ronald Reagan,Republican,First
4,1984,541811,70393,1246981,18696,909833,Ronald Reagan,Republican,First


# Basic Exploration

In [10]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 41 entries, 0 to 40
Data columns (total 9 columns):
 #   Column                         Non-Null Count  Dtype 
---  ------                         --------------  ----- 
 0   year                           41 non-null     int64 
 1   lawful_permanent_resident_obt  41 non-null     int64 
 2   refugee_arrivals               41 non-null     int64 
 3   noncitizen_apprehensions       41 non-null     int64 
 4   noncitizen_removals            41 non-null     int64 
 5   noncitizen_returns             41 non-null     int64 
 6   president                      41 non-null     object
 7   party                          41 non-null     object
 8   term                           41 non-null     object
dtypes: int64(6), object(3)
memory usage: 3.0+ KB


In [11]:
df.describe()

Unnamed: 0,year,lawful_permanent_resident_obt,refugee_arrivals,noncitizen_apprehensions,noncitizen_removals,noncitizen_returns
count,41.0,41.0,41.0,41.0,41.0,41.0
mean,2000.0,918537.0,75223.658537,1119217.0,182164.804878,862708.8
std,11.979149,281844.8,36474.413578,330125.4,146325.292532,477608.5
min,1980.0,524295.0,11840.0,596560.0,15216.0,100454.0
25%,1990.0,653206.0,56384.0,889212.0,33189.0,471798.0
50%,2000.0,973445.0,69920.0,1094719.0,183114.0,931600.0
75%,2010.0,1062040.0,85285.0,1291065.0,324303.0,1105829.0
max,2020.0,1826595.0,207116.0,1814729.0,432201.0,1675876.0


### Box Plots

In [63]:
lawful_permanent_resident_violin = px.violin(df, y='lawful_permanent_resident_obt', points='all', box=True,
                                             title='Distribution of Lawful Permanent Residents Obtainees')
lawful_permanent_resident_violin.show()

In [62]:
refugee_arrivals_violin = px.violin(df, y='refugee_arrivals', points='all', box=True,
                                    title='Distribution of Refugee Arivals')
refugee_arrivals_violin.show()

In [61]:
noncitizen_apprehensions_violin = px.violin(df, y='noncitizen_apprehensions', points='all', box=True,
                                         title='Distribution of Noncitizen Apprehensions')
noncitizen_apprehensions_violin.show()

In [59]:
noncitizen_removals_violin = px.violin(df, y='noncitizen_removals', points='all', box=True,
                                       title='Distribution of Noncitizen Removals')
noncitizen_removals_violin.show()

In [57]:
noncitizen_returns_violin = px.violin(df, y='noncitizen_returns', points='all', box=True,
                                   title='Distribution of Noncitizen Returns')
noncitizen_returns_violin.show()

### Line PLots

In [23]:
refugee_arrivals_line = px.line(df, x='year', y='refugee_arrivals', title='US Refugee Arrivals By Year')
refugee_arrivals_line.show()

In [24]:
noncitizen_apprehensions_line = px.line(df, x='year', y='noncitizen_apprehensions', title='US Non Citizen Apprehensions By Year')
noncitizen_apprehensions_line.show()

In [25]:
noncitizen_removals_line = px.line(df, x='year', y='noncitizen_removals', title='US Noncitizen Removals By Year')
noncitizen_removals_line.show()

In [27]:
noncitizen_returns_line = px.line(df, x='year', y='noncitizen_returns', title='US Noncitizen Returns By Year')
noncitizen_returns_line.show()