In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# Women in Entrepreneurship Dataset - Data Visualization Practice

In [None]:
import pandas as pd
pd.plotting.register_matplotlib_converters()
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
print("Setup Complete")

In [None]:
# Import and Preview the Data
data = pd.read_csv('/kaggle/input/women-entrepreneurship-and-labor-force/Dataset3.csv', ';')
data.tail()

In [None]:
# Seperate into the developed and developing world
developed = data[data['Level of development'] == 'Developed']
developing = data[data['Level of development'] == 'Developing']

# No columuns is alphabetical and holds no real significance
developed = developed.drop('No', axis=1)
developing = developing.drop('No', axis=1)

# Get some basic high level statistical information
print("Developed World")
print("_" * 90 )
print(developed.describe())


print("\n \nDeveloping World")
print('_'* 90 )
print(developing.describe())

In [None]:
plt.figure(figsize=(10, 10))

# Aggregate feel of the data
sns.relplot(x="Inflation rate", y="Women Entrepreneurship Index", hue="Level of development", size="Female Labor Force Participation Rate",
            sizes=(40, 400), alpha=.5, palette="muted",
            height=6, data=data)

plt.title("Woman in Entrepreneurship")

## Take Aways
 - Developed world has much higher levels of female entrepreneurship 
 - Developed world experiences lowere levels of inflation rates, which is condusive for higher levels of aggregate lending 
 - Female Labor Force Participation rates don't vary widely across the developed vs developing world, though the developing world lags by about 10% behind the developed world

# Inflation Rates

In [None]:
plt.figure(figsize=(10, 10))

sns.kdeplot(data=developing['Inflation rate'], shade=True)

plt.legend()
plt.title('Distribution of Inflation Rates Across Developing World')
plt.xlabel('Inflation Rate (%)')


In [None]:
plt.figure(figsize=(10, 10))

sns.kdeplot(data=developed['Inflation rate'], shade=True)

plt.legend()
plt.title('Distribution of Inflation Rates Across Developed World')
plt.xlabel('Inflation Rate (%)')

In [None]:
# get country with highest inflation rate
highest_name = data.Country[data['Inflation rate'].idxmax()]
highest = data[data.Country == highest_name]
highest

In [None]:
# get country with lowest inflation rate
lowest_name = data.Country[data['Inflation rate'].idxmin()]
lowest = data[data.Country == lowest_name]
lowest

In [None]:
# get percent of countries running negative interest rates
neg_rates = data.Country[data['Inflation rate'] < 0 ]
percent_neg_inflation_rates = neg_rates.count()/data.Country.count() * 100 
print("{} % of countries are running negative interest rates".format(percent_neg_inflation_rates))

## Take Aways
 - Inflation rate vary much more in the developing world 
     - has a less normalized distribution
 - Both distributions look Guassian in nature
 - 35% of countries run negative interest rates

# Female Labor Participation Rate 

In [None]:
plt.figure(figsize=(10, 10))

sns.kdeplot(data=developing['Female Labor Force Participation Rate'], shade=True)

plt.legend()
plt.title('Distribution of Female Labor Force Participation Rate Developing World')
plt.xlabel('Female Labor Force Participation Rate (%)')

In [None]:
plt.figure(figsize=(10, 10))

sns.kdeplot(data=developed['Female Labor Force Participation Rate'], shade=True)

plt.legend()
plt.title('Distribution of Female Labor Force Participation Rate Developed World')
plt.xlabel('Female Labor Force Participation Rate (%)')

## Differences in Total Levels of Entrepreneurship

In [None]:
entrepreur = data['Entrepreneurship Index']
female_entrepreneurship = data["Women Entrepreneurship Index"]


# Correlation Coef
pearsons_correlation_coeficients = (np.corrcoef(entrepreur, female_entrepreneurship))

# only get the one we care about
r2 = (pearsons_correlation_coeficients[0, 1])**2

r2 = round(r2, 2)


plt.figure(figsize=(10, 10))

sns.regplot(x="Entrepreneurship Index", y="Women Entrepreneurship Index", data=data)

plt.title("Women in Entrepreneur index vs Aggregate Entrepreneur Index // R2: {}".format(r2))

            


print("Strong Correlation Exhibitted")

### Take Aways
 - Higher levels of overall entrepreneurship correlates with higher levels of women in entrepreneurship