<a href="https://colab.research.google.com/github/natatsypora/figure_friday/blob/main/FigureFriday_38.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import plotly.graph_objects as go

import pandas as pd
import numpy as np

## Clone the Repository from GitHub

In [None]:
# Clone the Repository
!git clone https://github.com/BloombergGraphics/2024-h1b-immigration-data.git

In [4]:
df = pd.read_csv("/content/2024-h1b-immigration-data/TRK_13139_FY2021.zip",
                 low_memory=False)

df.head(3)

Unnamed: 0,bcn,country_of_birth,country_of_nationality,ben_date_of_birth,ben_year_of_birth,gender,employer_name,FEIN,mail_addr,city,...,S1Q1B,BEN_EDUCATION_CODE,ED_LEVEL_DEFINITION,BEN_PFIELD_OF_STUDY,BEN_COMP_PAID,DOT_CODE,NAICS_CODE,S3Q1,S4Q1,T_U_VAWA_FLAG
0,(b)(6),CHN,CHN,(b)(6),1981,male,D&R I.P. Law Firm,453745389,108 N Ynez Ave,Monterey Park,...,,,,,,,,,,
1,(b)(6),IND,IND,(b)(6),1994,male,ITTECHNICA INC,824530582,1825 W Walnut Hill Ln,Irving,...,,,,,,,,,,
2,(b)(6),CAN,CAN,(b)(6),1988,male,"Tesla, Inc.",912197729,3500 Deer Creek Rd,Palo Alto,...,,G,MASTER'S DEGREE,COMPUTER ENGINEERING,125000.0,7.0,336111.0,B,N,


In [5]:
df['bcn'].value_counts()

Unnamed: 0_level_0,count
bcn,Unnamed: 1_level_1
(b)(6),269377
(b)(3) (b)(6) (b)(7)(c),47


In [6]:
# only nonblank value
df = df[df['bcn']!='(b)(3) (b)(6) (b)(7)(c)'].copy()
df.shape

(269377, 56)

In [9]:
# Status of the registration."SELECTED" means the registration was selected in the lottery
df.status_type.value_counts(dropna=False)

Unnamed: 0_level_0,count
status_type,Unnamed: 1_level_1
CREATED,145009
SELECTED,124368


In [10]:
# First decision of petition
df.FIRST_DECISION.value_counts(dropna=False)

Unnamed: 0_level_0,count
FIRST_DECISION,Unnamed: 1_level_1
,169512
Approved,96084
Denied,3781


In [11]:
f'Approved + Denied = {96084+3781:,.0f}'

'Approved + Denied = 99,865'

In [12]:
# Unique number assigned to the petition
df.RECEIPT_NUMBER.value_counts()

Unnamed: 0_level_0,count
RECEIPT_NUMBER,Unnamed: 1_level_1
(b)(6),99984


In [53]:
df_unique_petition = df[~df['RECEIPT_NUMBER'].isna()][['i129_employer_name', 'gender']]
df_unique_petition.shape

(99984, 2)

In [57]:
# Data for plot
df_for_pie = df_unique_petition['gender'].value_counts(normalize=True)
text_annot = df_unique_petition.shape[0]
text_pie = df_unique_petition['gender'].value_counts()

# Create the figure with pie chart (hole = 0.7)
fig = go.Figure(data=[go.Pie(labels=[el.title() for el in df_for_pie.index],
                             values=df_for_pie.values, textfont_size=14,
                             text=text_pie, hoverinfo='skip',
                             textinfo='label+percent+text', textposition='outside',
                             texttemplate='<b>%{label}</b> <br><b>%{percent:0.0%}</b><br>(%{text:,.0f})',
                             marker_colors=['rgba(31, 119, 180, 0.2)','rgba(31, 119, 180, 1.0)'],
                             marker_line_color='rgb(8, 48, 107)',
                             marker_line_width=1,
                             hole=0.7)])

# Add text to center of the pie
fig.add_annotation(x=0.5, y=0.5,
                   text=f'Total <br>Number of Receipts <br><b>{text_annot:,.0f}</b>',
                   font_size=14,
                   showarrow=False)

# Define layout properties
fig.update_layout(paper_bgcolor='rgba(31, 119, 180, 0.05)', plot_bgcolor='rgba(0, 0, 0, 0)',
                  width=400, height=300,
                  margin=dict(l=50, r=50, b=20, t=70),
                  title=f'<b>Gender Proportions by Receipt Number</b>',
                  title_x=0.5, font_size=12,
                  showlegend=False)

fig.show()

## Bar chart Gender Breakdown of Top 10 Employer Name

In [58]:
# Get the top 10 employer names
top_10_employer_name = df_unique_petition['i129_employer_name'].value_counts().nlargest(10).index

# Filter the dataframe for the top 10 employers
df_up_filtered = df_unique_petition[df_unique_petition['i129_employer_name'].isin(top_10_employer_name)]

# Create a crosstab of employer names and gender
crosstab_result = pd.crosstab(df_up_filtered['i129_employer_name'],
                              df_up_filtered['gender'],
                              normalize='index').sort_values(by='female')

# Define customdata for hoverinfo
values = pd.crosstab(df_up_filtered['i129_employer_name'],
                     df_up_filtered['gender'])

# Define colors for bar
marker_colors = ['rgba(31, 119, 180, 0.2)', 'rgba(31, 119, 180, 1.0)']

# Get first word of employer name
y = [name.split()[0] for name in crosstab_result.index]

In [60]:
# Create the figure
fig2 = go.Figure()

# Add bars to the figure
for col, m_c in zip(crosstab_result.columns[::-1], marker_colors):
    fig2.add_trace(go.Bar(
                    x=crosstab_result[col],
                    y=y,
                    orientation='h',
                    name=col.title(),
                    width=0.5,
                    customdata=values[col].tolist(),
                    text=crosstab_result[col], texttemplate='%{text:0.0%}',
                    hovertemplate='%{y}<br>%{text:.1%} (%{customdata:,.0f})<extra></extra>',
                    marker_color=m_c,
                    marker_line_color='rgb(8, 48, 107)',
                    marker_line_width=1)
                )

# Layout Configuration
fig2.update_layout(
    paper_bgcolor='rgba(31, 119, 180, 0.05)', plot_bgcolor='rgba(0, 0, 0, 0)',
    title=f'<b>Gender Breakdown of Top 10 Employer Name <br><sub>H-1B Visa Lottery 2021<b>',
    template='simple_white', font_size=12,
    barmode='group', bargap=0.5,
    margin=dict(t=100, b=20, l=100, r=20),
    width=500, height=550,
    xaxis_visible=False,
    yaxis=dict(linecolor='rgba(0, 0, 0, 0)',
               tickfont_color='rgb(8, 48, 107)',
               tickfont_weight='bold'),
    # define legend properties
    legend=(dict(orientation='h', bgcolor='rgba(0, 0, 0, 0)',
                 traceorder='reversed',
                 x=0.7, y=1.05,
                 xanchor='center',
                 yanchor='middle'))
                )
fig2.show()