In [None]:
# Importing required libraries

import pandas as pd
import matplotlib.pyplot as plt
import plotly.graph_objects as go
import seaborn as sns
import plotly.express as px
import numpy as np
import json
import folium

## Data Collection:

In [None]:
# Reading the raw data files

df_2012 = pd.read_excel('./Disclosure Files/PW_FY2012.xlsx')
df_2013 = pd.read_excel('./Disclosure Files/PW_FY2013.xlsx')
df_2014 = pd.read_excel('./Disclosure Files/PW_Case_Data_FY2014.xlsx')
df_2015 = pd.read_excel('./Disclosure Files/PW_Disclosure_Data_FY15_Q4.xlsx')
df_2016 = pd.read_excel('./Disclosure Files/PW_Disclosure_Data_FY16.xlsx')
df_2017 = pd.read_excel('./Disclosure Files/PW_Disclosure_Data_FY17.xlsx')
df_2018 = pd.read_excel('./Disclosure Files/PWD_Disclosure_Data_FY2018_EOY.xlsx')
df_2019 = pd.read_excel('./Disclosure Files/PW_Disclosure_Data_FY2019.xlsx')
df_2020 = pd.read_excel('./Data Set/Disclosure Files/PW_Disclosure_Data_FY2020.xlsx')
df_2020_ws = pd.read_excel('./Disclosure Files/PW_Worksites_FY2020.xlsx')

In [None]:
# Selecting the columns common to all the years data

df_2012 = df_2012.loc[:,['CASE_NUMBER','PW_DETERMINATION_DATE','VISA_CLASS','CASE_STATUS','EMPLYER_LEGAL_BUSINESS_NAME','EMPLOYER_CITY','EMPLOYER_STATE','PRIMARY_EDUCATION_LEVEL','EMP_EXPERIENCE_MONTHS','PRIMARY_WORKSITE_CITY','PRIMARY_WORKSITE_STATE','PWD_WAGE_RATE','PWD_SOC_CODE_TITLE']]
df_2013 = df_2013.loc[:,['CASE_NUMBER','PW_DETERM_DATE','VISA_CLASS','CASE_STATUS','EMPLOYER_LEGAL_BUSINESS_NAME','EMPLOYER_CITY','EMPLOYER_STATE','PRIMARY_EDUCATION_LEVEL','EMP_EXPERIENCE_MONTHS','PRIMARY_WORKSITE_CITY','PRIMARY_WORKSITE_STATE','PWD_WAGE_RATE','PWD_SOC_TITLE']]
df_2014 = df_2014.loc[:,['CASE_NUMBER','PW_DETERMINATION_DATE','VISA_CLASS','CASE_STATUS','EMPLOYER_LEGAL_BUSINESS_NAME','EMPLOYER CITY','EMPLOYER STATE','PRIMARY_EDUCATION_LEVEL','EMP_EXPERIENCE_MONTHS','PRIMARY_WORKSITE_CITY','PRIMARY_WORKSITE_STATE','PWD_WAGE_RATE','PWD_SOC_TITLE']]
df_2015 = df_2015.loc[:,['CASE_NUMBER','DETERMINATION_ISSUED','VISA_CLASS','STATUS','BUSINESS_NAME','EMPLOYER CITY','EMPLOYER STATE','EDUCATION_LEVEL','EMP_EXP_NUM_MONTHS','WORKSITE_CITY','WORKSITE_STATE','PREVAIL_WAGE','JOB_TITLE']]
df_2016 = df_2016.loc[:,['CASE_NUMBER','DETERMINATION_DATE','VISA_CLASS','CASE_STATUS','BUSINESS_NAME','EMPLOYER_CITY','EMPLOYER_STATE','PRIMARY_EDUCATION_LEVEL','EMP_EXPERIENCE_MONTHS','PRIMARY_WORKSITE_CITY','PRIMARY_WORKSITE_STATE','PWD_WAGE_RATE','PWD_SOC_TITLE']]
df_2017 = df_2017.loc[:,['CASE_NUMBER','DETERMINATION_DATE','VISA_CLASS','CASE_STATUS','BUSINESS_NAME','EMPLOYER_CITY','EMPLOYER_STATE','PRIMARY_EDUCATION_LEVEL','EMP_EXPERIENCE_MONTHS','PRIMARY_WORKSITE_CITY','PRIMARY_WORKSITE_STATE','PWD_WAGE_RATE','PWD_SOC_TITLE']]
df_2018 = df_2018.loc[:,['CASE_NUMBER','DETERMINATION_DATE','VISA_CLASS','CASE_STATUS','BUSINESS_NAME','EMPLOYER _CITY','EMPLOYER _STATE','PRIMARY_EDUCATION_LEVEL','EMP_EXPERIENCE_MONTHS','PRIMARY_WORKSITE_CITY','PRIMARY_WORKSITE_STATE','PWD_WAGE_RATE','PWD_SOC_TITLE']]
df_2019 = df_2019.loc[:,['CASE_NUMBER','DETERMINATION_DATE','VISA_CLASS','CASE_STATUS','BUSINESS_NAME','EMPLOYER_CITY','EMPLOYER_STATE','PRIMARY_EDUCATION_LEVEL','EMP_EXPERIENCE_MONTHS','PRIMARY_WORKSITE_CITY','PRIMARY_WORKSITE_STATE','PWD_WAGE_RATE','PWD_SOC_TITLE']]
df_2020 = df_2020.loc[:,['CASE_NUMBER','DETERMINATION_DATE','VISA_CLASS','CASE_STATUS','EMPLOYER_LEGAL_BUSINESS_NAME','EMPLOYER_CITY','EMPLOYER_STATE','PRIMARY_EDUCATION_LEVEL','EMP_EXPERIENCE_MONTHS','PRIMARY_WORKSITE_CITY','PRIMARY_WORKSITE_STATE','PWD_WAGE_RATE','PWD_SOC_TITLE']]

In [None]:
# Creating final list of columns

final_col_names = ['CASE_NUMBER','DETERMINATION_DATE','VISA_CLASS','CASE_STATUS','EMPLYER_LEGAL_BUSINESS_NAME','EMPLOYER_CITY','EMPLOYER_STATE','PRIMARY_EDUCATION_LEVEL','EMP_EXPERIENCE_MONTHS','PRIMARY_WORKSITE_CITY','PRIMARY_WORKSITE_STATE','WAGE_RATE','JOB_TITLE']

In [None]:
# Renaming the those columns in the data frames dedicated to each year

df_2012 = pd.DataFrame(df_2012.values,columns=final_col_names)
df_2013 = pd.DataFrame(df_2013.values,columns=final_col_names)
df_2014 = pd.DataFrame(df_2014.values,columns=final_col_names)
df_2015 = pd.DataFrame(df_2015.values,columns=final_col_names)
df_2016 = pd.DataFrame(df_2016.values,columns=final_col_names)
df_2017 = pd.DataFrame(df_2017.values,columns=final_col_names)
df_2018 = pd.DataFrame(df_2018.values,columns=final_col_names)
df_2019 = pd.DataFrame(df_2019.values,columns=final_col_names)
df_2020 = pd.DataFrame(df_2020.values,columns=final_col_names)

In [None]:
# Merging the data from all the years to create final data set

df = pd.concat([df_2012,df_2013,df_2014,df_2015,df_2016,df_2017,df_2018,df_2019,df_2020])

In [None]:
# Exporting the final data set in CSV form

df.to_csv('final_df.csv',index=False)

In [None]:
# Importing final data set

df = pd.read_csv('final_df.csv')

In [None]:
# The final data set

df.head()

## Data Preprocessing and Cleaning:

In [None]:
df = df.dropna(subset = ['DETERMINATION_DATE', 'EMPLYER_LEGAL_BUSINESS_NAME', 'EMPLOYER_CITY', 'EMPLOYER_STATE', 
                         'PRIMARY_EDUCATION_LEVEL', 'PRIMARY_WORKSITE_CITY', 'PRIMARY_WORKSITE_STATE', 'WAGE_RATE', 'JOB_TITLE'])

In [None]:
df = df.drop_duplicates()

In [None]:
df['EMP_EXPERIENCE_MONTHS'].fillna(0)

In [None]:
df['CASE_STATUS'] = df['CASE_STATUS'].str.title()

In [None]:
df['CASE_STATUS'] = df['CASE_STATUS'].apply(lambda x: 'Center Director Review Modified Determination' if x == 'Center Director Review - Modified Determination' else x)
df['CASE_STATUS'] = df['CASE_STATUS'].apply(lambda x: 'Center Director Review Affirmed Determination' if x == 'Center Director Review - Affirmed Determination' else x)

In [None]:
states = {'MARSHALL ISLANDS' : 'MARSHALL ISLANDS', 'AL' : 'Alabama', 'VERMONT':'Vermont', 'NEBRASKA':'Nebraska', 'CONNECTICUT':'Connecticut', 'NEW MEXICO':'New Mexico', 'RHODE ISLAND':'Rhode Island', 'SC':'South Carolina', 'NEVADA':'Nevada', 'OH':'Ohio', 'WISCONSIN':'Wisconsin', 'COLORADO':'Colorado', 'SD':'South Dakota', 'CT':'Connecticut', 'MT':'Montana', 'NORTH CAROLINA':'North Carolina', 'LA':'Louisiana', 'NH':'New Hampshire', 'ND':'North Dakota', 'HI':'Hawaii', 'PW':'PW', 'PALAU':'Palau', 'ALASKA':'Alaska', 'WEST VIRGINIA':'West Virginia', 'PR':'Puerto Rico', 'MISSOURI':'Missouri', 'VIRGINIA':'Virginia', 'UTAH':'Utah', 'MO':'Missouri', 'NE':'Nebraska', 'OKLAHOMA':'Oklahoma', 'VT':'Vermont', 'ARKANSAS':'Arkansas', 'NORTHERN MARIANA ISLANDS':'NORTHERN MARIANA ISLANDS', 'NEW YORK':'New York', 'FLORIDA':'Florida', 'PUERTO RICO':'Puerto Rico', 'TN':'Tennessee', 'ALABAMA':'Alabama', 'MS':'Missouri', 'SOUTH CAROLINA':'South Carolina', 'NC':'North Carolina', 'GU':'GU', 'NV':'Nevada', 'KANSAS':'Kansas', 'MINNESOTA':'Minnesota', 'NEW JERSEY':'New Jersey', 'CA':'California', 'VIRGIN ISLANDS':'Virgin Islands', 'KS':'Kansas', 'WASHINGTON':'Washington', 'TX':'Texas', 'IL':'Illinois', 'RI':'Rhode Island', 'WA':'Washington', 'NEW HAMPSHIRE':'New Hampshire', 'IDAHO':'Idaho', 'KY':'Kentucky', 'IN':'Indiana', 'MONTANA':'Montana', 'LOUISIANA':'Louisiana', 'FM':'FM', 'MD':'Maryland', 'VI':'Virgin Islands', 'OHIO':'Ohio', 'NY':'New York', 'OK':'Oklahoma', 'KENTUCKY':'Kentucky', 'AK':'Alaska', 'UT':'Utah', 'ILLINOIS':'Illinois', 'DELAWARE':'Delaware', 'SOUTH DAKOTA':'South Dakota', 'ARIZONA':'Arizona', 'NORTH DAKOTA':'North Dakota', 'WY':'Wyoming', 'WV':'West Virginia', 'MASSACHUSETTS':'Massachusetts', 'NM':'New Mexico', 'AZ':'Arizona', 'CALIFORNIA':'California', 'DISTRICT OF COLUMBIA':'District of Columbia', 'MN':'Minnesota', 'GEORGIA':'Georgia', 'FL':'Florida', 'TEXAS':'Texas', 'PENNSYLVANIA':'Pennsylvania', 'AR':'Arkansas', 'DC':'District of Columbia', 'HAWAII':'Hawaii', 'GA':'Georgia', 'DE':'Delaware', 'MISSISSIPPI':'Mississippi', 'MAINE':'Maine', 'ME':'Maine', 'WYOMING':'Wyoming', 'PA':'Pennsylvania', 'MP':'NORTHERN MARIANA ISLANDS', 'NJ':'New Jersey', 'OR':'Oregon', 'GUAM':'Guam', 'TENNESSEE':'Tennessee', 'OREGON':'Oregon', 'ID':'Idaho', 'MA':'Massachusetts', 'MI':'Michigan', 'WI':'Wisconsin', 'VA':'Virginia', 'IA':'Iowa', 'MICHIGAN':'Michigan', 'CO':'Colorado', 'MARYLAND':'Maryland', 'IOWA':'Iowa', 'INDIANA':'Indiana'}

In [None]:
df['PRIMARY_WORKSITE_STATE'] = df['PRIMARY_WORKSITE_STATE'].apply(lambda x: states[x])

In [None]:
df['EMPLOYER_STATE'] = df['EMPLOYER_STATE'].apply(lambda x: states[x])

In [None]:
usa_states = {'Alabama':'AL','Alaska':'AK','Arizona':'AZ','Arkansas':'AR','California':'CA','Canal Zone':'CZ','Colorado':'CO','Connecticut':'CT','Delaware':'DE','District of Columbia':'DC','Florida':'FL','Georgia':'GA','Guam':'GU','Hawaii':'HI','Idaho':'ID','Illinois':'IL','Indiana':'IN','Iowa':'IA','Kansas':'KS','Kentucky':'KY','Louisiana':'LA','Maine':'ME','Maryland':'MD','Massachusetts':'MA','Michigan':'MI','Minnesota':'MN','Mississippi':'MS','Missouri':'MO','Montana':'MT','Nebraska':'NE','Nevada':'NV','New Hampshire':'NH','New Jersey':'NJ','New Mexico':'NM','New York':'NY','North Carolina':'NC','North Dakota':'ND','Ohio':'OH','Oklahoma':'OK','Oregon':'OR','Pennsylvania':'PA','Puerto Rico':'PR','Rhode Island':'RI','South Carolina':'SC','South Dakota':'SD','Tennessee':'TN','Texas':'TX','Utah':'UT','Vermont':'VT','Virgin Islands':'VI','Virginia':'VA','Washington':'WA','West Virginia':'WV','Wisconsin':'WI','Wyoming':'WY','NORTHERN MARIANA ISLANDS':'NORTHERN MARIANA ISLANDS','MARSHALL ISLANDS' : 'MARSHALL ISLANDS'}

In [None]:
df['US_STATE_ABV'] = df['PRIMARY_WORKSITE_STATE'].map(usa_states)

In [None]:
df['YEAR'] = pd.DatetimeIndex(df['DETERMINATION_DATE']).year.astype(int)

In [None]:
df.info()

## Data Visualization:

### Number of Applicants Across Years:

In [None]:
df['VISA_CLASS'].value_counts()

In [None]:
years = list(range(2011,2021))
yr = df.groupby(['VISA_CLASS','YEAR']).count()['CASE_NUMBER'].reset_index().sort_values('YEAR').reset_index(drop=True)
classes = set(yr.VISA_CLASS)

data = {'YEAR' : years}
for i in classes:
    l = []
    for j in years:
        try:
            val = yr[yr['VISA_CLASS'] == i][yr['YEAR'] == j]['CASE_NUMBER'].iloc[0]
        except:
            val = 0
        l.append(val)
    data[i] = l
temp = pd.DataFrame(data)

In [None]:
l = np.array([0]*10)
colors = ['b','lightblue','teal','coral']
plt.figure(figsize = (20, 10))

plt.title('Number of Applicants Across Years', fontsize = 18, fontweight = 'bold')
plt.ylabel('Number of Applications', fontsize = 14)
plt.xlabel('Years', fontsize = 14)
plt.xticks([2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020], fontsize = 12)
plt.yticks(fontsize = 12)
plt.ylim(0, 180000)

for i,j in zip(['H-1B','PERM','H-2B','CW-1'],colors):
    plt.bar(temp['YEAR'],temp[i],bottom=l,color=j)
    l += np.array(temp[i])
    
plt.legend(['H-1B','PERM','H-2B','CW-1'])

plt.show()

### Average Wage Rate across all VISA Classes:

In [None]:
df_MinWage = df.groupby('YEAR')['WAGE_RATE'].mean().reset_index()

In [None]:
m = df_MinWage['WAGE_RATE'].max()
v = df_MinWage[df_MinWage['WAGE_RATE'] == m]

In [None]:
sns.set_style('darkgrid')
df_MinWage.plot(kind = 'line', x = 'YEAR', y = 'WAGE_RATE', figsize=(16,8), color='red')

plt.title('Average Wage Rate Across all VISA Classes from 2011 - 2020',fontsize=18,fontweight='bold')
plt.ylabel('Average Wage Rate',fontsize=14)
plt.xlabel('Years',fontsize=14)
plt.xticks(fontsize=12)
plt.yticks(fontsize=12)

plt.text(2018.5, m, 'Maximum Average Wage', fontsize=14)

bbox = dict(boxstyle ="round", fc = 'None')

arrowprops = dict(
    arrowstyle = "->",
    connectionstyle = "angle, angleA = 0, angleB = 90,\
    rad = 10", color = 'black')
  
offset = -70

xdata, ydata = v['YEAR'].iloc[0], m

plt.annotate((v['YEAR'].iloc[0],m),
            (xdata, ydata), xytext =(2 * offset, 4 * offset),
            textcoords ='offset points',
            bbox = bbox, arrowprops = arrowprops)

plt.show()

### Distribution of the number of applications across US States

In [None]:
df_state_cases = df.groupby(by=['PRIMARY_WORKSITE_STATE','US_STATE_ABV','YEAR'])['CASE_NUMBER'].count().reset_index()
df_state_cases.head()

In [None]:
fig = go.Figure(data=go.Choropleth(
    locations=df_state_cases['US_STATE_ABV'], # Spatial coordinates
    z = df_state_cases['CASE_NUMBER'], # Data to be color-coded
    locationmode = 'USA-states', # set of locations match entries in `locations`
    colorscale = 'Reds',
    colorbar_title = "Total Number of Applications",
))

fig.update_layout(
    title_text = 'Distribution of Number of Wage Applications across US States',
    geo_scope='usa', # limited map scope to USA
)

fig.show()

### Distribution of the number of applications across US States:

In [None]:
usa_geo = r'USGeodata.json'

In [None]:
world_map = folium.Map(location=[48,-102],zoom_start=2.5)

In [None]:
df_state_wage = df.groupby(by='PRIMARY_WORKSITE_STATE')['WAGE_RATE'].mean().reset_index()
df_state_wage['WAGE_RATE'] = round(df_state_wage['WAGE_RATE'], 0)

In [None]:
world_map.choropleth(geo_data = usa_geo,
                    data = df_state_wage,
                    columns=['PRIMARY_WORKSITE_STATE','WAGE_RATE'],
                    key_on = 'feature.properties.name',
                    fill_color = 'YlOrRd',
                    fill_capacity=0.7,
                    Highlight= True,
                    legend_name = "Average Wage Rate Range")
world_map

### Trend of number of applications from 2011-2020

In [None]:
px.choropleth(df_state_cases, 
              locations = 'US_STATE_ABV',
              color="CASE_NUMBER", 
              animation_frame="YEAR",
              color_continuous_scale="YlOrRd",
              locationmode='USA-states',
              scope="usa",
              title='Trend of Number of Applications from 2011-2020',
              height=600
             )

### Average Experience in Months for each Prevailing wage Class

In [None]:
df_VisaExp = df.groupby('VISA_CLASS')['EMP_EXPERIENCE_MONTHS'].mean().reset_index()

In [None]:
fig = px.bar(df_VisaExp, x = "EMP_EXPERIENCE_MONTHS", color = "VISA_CLASS", 
       title = "Average Experience in Months for each Visa Class", color_discrete_sequence = px.colors.qualitative.Vivid,
    labels = dict(index = "Visa Class", EMP_EXPERIENCE_MONTHS = "Average Experience (Months)", VISA_CLASS = "Visa Class"))
fig.update_layout(font_family = "Rockwell")
fig.show()

### Case status Distribution for case numbers: Prevailing wage Class

In [None]:
df_CaseStat = df.groupby(by=['CASE_STATUS'])['CASE_NUMBER'].count().reset_index()
df_Case = df_CaseStat[df_CaseStat['CASE_STATUS'] != 'Determination Issued']
df_Case

In [None]:
fig = px.scatter(df_Case, x = "CASE_NUMBER", y = "CASE_STATUS", hover_name = "CASE_STATUS",
                  color = "CASE_STATUS", size = "CASE_NUMBER",
                 size_max = 100, template = "simple_white", title = "Trend of Case Status", 
                 range_x=[0,10000],color_discrete_sequence = px.colors.qualitative.G10)

fig.update_layout(font_family = "Rockwell",legend = dict(orientation = "h", title = "", y = 1.1, x = 1, xanchor = "right", yanchor = "bottom"))
fig.show()

### Education Level Distribution for Prevailing Wage Class

In [None]:
df_PrimEdu = df[df.PRIMARY_EDUCATION_LEVEL != 'None']
df_PrimEdu['PRIMARY_EDUCATION_LEVEL'].replace(
    {'Other degree (JD, MD, etc.)': 'Other Degree', 'Other Degree (JD, MD, etc.)': 'Other Degree', 
     'High School/GED ':'High School/GED'}, inplace=True)
df_PrimEdu = df_PrimEdu[df_PrimEdu.PRIMARY_EDUCATION_LEVEL != 'Other Degree']
df_PrimEdu['PRIMARY_EDUCATION_LEVEL'].dropna()

In [None]:
df_Edu = df_PrimEdu.groupby('PRIMARY_EDUCATION_LEVEL')['CASE_NUMBER'].count().reset_index()
df_Edu.rename(columns = {'CASE_NUMBER' : 'Total Number of Applications'}, inplace = True)

In [None]:
colors_list = ['gold', 'yellowgreen', 'lightcoral', 'lightskyblue', 'navy']
explode_list = [0.1, 0.1, 0.1, 0.1, 0.1]
textprops = {"fontsize":14}

df_Edu['Total Number of Applications'].plot(kind='pie', figsize=(16, 8), autopct='%1.1f%%',
                            startangle=90, shadow=True,  labels=None, y = "Total Number of Applications",
                            pctdistance = 1.12, colors=colors_list, explode = explode_list, textprops=textprops)

plt.title('Education Level Distribution for Wage Applications across Years', y = 1.12, fontsize = 18, fontweight = 'bold')
plt.axis('equal')
plt.ylabel('Total Number of Applications', fontsize = 14)
plt.legend(labels = df_Edu['PRIMARY_EDUCATION_LEVEL'], loc='upper left')

plt.show()

### Proportion of Wage Applications and Visa Classes US State-wise:

In [None]:
df_EmpState = df.groupby(by = ["VISA_CLASS", "CASE_STATUS", "EMPLOYER_STATE"])['CASE_NUMBER'].count().reset_index()
df_EmpState = df_EmpState[df_EmpState['CASE_STATUS'] != 'Determination Issued']
df_EmpState.rename(columns={'CASE_NUMBER':'Total Number of Applications'},inplace=True)

In [None]:
px.sunburst(df_EmpState, values = "Total Number of Applications", color = "Total Number of Applications", 
            path = ["VISA_CLASS", "CASE_STATUS", "EMPLOYER_STATE"], color_continuous_scale = "RdBu")

### Distribution of the Count of Wage Applications across the Globe in 2020

In [None]:
df_2020 = pd.read_excel('./Data Set/Disclosure Files/PW_Disclosure_Data_FY2020.xlsx')

In [None]:
df_2020 = df_2020.loc[:,['CASE_NUMBER','CASE_STATUS','VISA_CLASS','REQUESTOR_POC_COUNTRY']]
df_2020.head()

In [None]:
df_2020_VisaDist = df_2020.groupby(by = ["REQUESTOR_POC_COUNTRY"])['CASE_NUMBER'].count().reset_index()

In [None]:
df_2020_VisaDist['REQUESTOR_POC_COUNTRY'] = df_2020_VisaDist['REQUESTOR_POC_COUNTRY'].str.title()
df_2020_VisaDist.rename(columns={'CASE_NUMBER' : 'Total Number of Applications'}, inplace = True)

In [None]:
abr = ['AFG','ARG','AUS','BRA','CAN','CHL','CHN','COL','HRV','GEO','DDR','IND','ISR','JAM','MEX','MDA','NOR','PAK','PHL','KNA','SRB','UKR','UAE','GBR','USA','VEN']
iso_alpha = pd.DataFrame(abr)
iso_alpha.rename(columns={0:'iso_alpha'},inplace=True)

In [None]:
frames = [df_2020_VisaDist,iso_alpha]
df_2020_VisaDist_plot = pd.concat(frames,axis=1)
df_2020_VisaDist_plot.head()

In [None]:
fig = px.choropleth(df_2020_VisaDist_plot,locations='iso_alpha',hover_name = 'REQUESTOR_POC_COUNTRY',
              color='Total Number of Applications',height=500)
fig.update_layout(
    title_text='Distribution of the Count of Wage Applications across the Globe in 2020',
    geo=dict(
        showframe=False,
        showcoastlines=False,
        projection_type='equirectangular'
    ),
    annotations = [dict(
        x=0.55,
        y=0.1,
        xref='paper',
        yref='paper',
        text='Source: <a href="https://www.cia.gov/library/publications/the-world-factbook/fields/2195.html">\
            CIA World Factbook</a>',
        showarrow = False
    )]
)