In [34]:
import pandas as pd
df = pd.read_csv('datasets/cleandata.csv')

In [2]:
df.columns

Index(['Unnamed: 0', 'Country Name', 'Country Code', 'Series Name', '2014',
       '2015', '2016', '2017', '2018', '2019'],
      dtype='object')

In [3]:
df['Series Name'].unique()

array(['Cost of business start-up procedures (% of GNI per capita)',
       'Cost of business start-up procedures, female (% of GNI per capita)',
       'Cost of business start-up procedures, male (% of GNI per capita)',
       'Time required to start a business (days)',
       'Time required to start a business, female (days)',
       'Time required to start a business, male (days)',
       'Unemployment with advanced education (% of total labor force with advanced education)',
       'Unemployment with advanced education, female (% of female labor force with advanced education)',
       'Unemployment with advanced education, male (% of male labor force with advanced education)',
       'Unemployment with basic education (% of total labor force with basic education)',
       'Unemployment with basic education, female (% of female labor force with basic education)',
       'Unemployment with basic education, male (% of male labor force with basic education)',
       'Unemployment with 

In [4]:
df[df['Series Name']=="Time to import, border compliance (hours)"]["2019"]

20               96
42               10
64         209.5714
86               72
108        61.33333
           ...     
5146    126.1642571
5168    126.7982627
5190    126.1642571
5212    45.13404612
5234    66.94549528
Name: 2019, Length: 238, dtype: object

In [26]:
df[df['Series Name']=="Average time to clear exports through customs (days)"]["2015"].unique()

array(['..', '12.7', '7.7', '8.3', '6.3', '14.5', '6.9'], dtype=object)

In [35]:
# columns to keep (Country Name, Series Name, and the year columns)
cols_to_keep = ['Country Name', 'Series Name', '2014', '2015', '2016', '2017', '2018', '2019']

# reshaping the df using melt()
df_melted = pd.melt(df[cols_to_keep], id_vars=['Country Name', 'Series Name'], var_name='year', value_name='value')

df_melted.head()


Unnamed: 0,Country Name,Series Name,year,value
0,Afghanistan,Cost of business start-up procedures (% of GNI...,2014,15.1
1,Afghanistan,"Cost of business start-up procedures, female (...",2014,15.1
2,Afghanistan,"Cost of business start-up procedures, male (% ...",2014,15.1
3,Afghanistan,Time required to start a business (days),2014,8.5
4,Afghanistan,"Time required to start a business, female (days)",2014,9.0


In [37]:
df_melted['value'] = pd.to_numeric(df_melted['value'], errors='coerce')

In [38]:
type(df_melted['value'][1])

numpy.float64

In [45]:
type(df_melted['year'][1])

str

In [50]:
df_melted.to_csv('datasets/melted.csv', index=False)

In [57]:
import altair as alt
countries = df_melted['Country Name'].unique()
years = ['2014', '2015', '2016', '2017', '2018']
countries_years_series_filtered = df_melted[(df_melted['Country Name'].isin(countries)) & 
                                            (df_melted['year'].isin(years)) & 
                                            (df_melted['Series Name']=="Average time to clear exports through customs (days)") & 
                                            (df_melted['value']<10)]

chart = alt.Chart(countries_years_series_filtered).mark_bar().encode(
    x=alt.X('Country Name'),
    y=alt.Y('value'),
    color='Country Name',
    column=alt.Column('year', header=alt.Header(title='Year')),
    tooltip='year')



chart

In [32]:
type(df_melted['year'][1])

str

In [34]:
countries_years_series_filtered.head()

Unnamed: 0,Country Name,Series Name,year,value
16,Afghanistan,Average time to clear exports through customs ...,2014,8.1
1666,India,Average time to clear exports through customs ...,2014,5.8
2590,Myanmar,Average time to clear exports through customs ...,2014,4.4
2612,Namibia,Average time to clear exports through customs ...,2014,8.2
2744,Nigeria,Average time to clear exports through customs ...,2014,6.0


In [1]:
import plotly.graph_objects as go

categories = ['processing cost','mechanical properties','chemical stability',
              'thermal stability', 'device integration']

fig = go.Figure()

# tracing layout
fig.add_trace(go.Scatterpolar(
      r=[1, 5, 2, 2, 3],
      theta=categories,
      fill='toself',
      name='Product A'
))

# tracing layout
fig.add_trace(go.Scatterpolar(
      r=[4, 3, 2.5, 1, 2],
      theta=categories,
      fill='toself',
      name='Product B'
))

# each circle values
fig.update_layout(
  polar=dict(
    radialaxis=dict(
      visible=True,
      range=[0, 5]
    )),
  showlegend=False
)

fig.show()

In [2]:
import pandas as pd
df = pd.read_csv("datasets/melted.csv")

In [41]:
type(df['year'].unique())

numpy.ndarray

In [11]:
import math

In [51]:
#type(df_melted['year'][1])
type(df['year'][1])

numpy.int64

In [12]:
import plotly.graph_objects as go
df['year'] = df['year'].astype(str)

categories = df['year'].unique()
max = math.ceil(df[df["Series Name"]=="Logistics performance index: Overall (1=low to 5=high)"]["value"].max())
min = math.floor(df[df["Series Name"]=="Logistics performance index: Overall (1=low to 5=high)"]["value"].min())
countries = df['Country Name'].unique()[0:5]

fig = go.Figure()


for i in countries:
    fig.add_trace(go.Scatterpolar(
      r=df[(df["Series Name"]=="Logistics performance index: Overall (1=low to 5=high)") &
       (df["Country Name"]==i) &
       (df["year"].isin(categories))]["value"].values.tolist(),
      theta=categories,
      fill='toself',
      name=i,
      connectgaps=True
))

# tracing layout
# fig.add_trace(go.Scatterpolar(
#       r=df[(df["Series Name"]=="Logistics performance index: Overall (1=low to 5=high)") &
#        (df["Country Name"]=="Albania") &
#        (df["year"].isin(categories))]["value"].values.tolist(),
#       theta=categories,
#       fill='toself',
#       name='Product A',
#       connectgaps=True
# ))

# tracing layout
# fig.add_trace(go.Scatterpolar(
#       r=[4, 3, 0, 0, 2],
#       theta=categories,
#       fill='toself',
#       name='Product B',
#       connectgaps=True
# ))

# each circle values
fig.update_layout(
  polar=dict(
    radialaxis=dict(
      visible=True,
      range=[min, max]
    )),
  showlegend=False
)

fig.show()

In [54]:
type(df['year'][1])

str

In [14]:
for i in countries:
    print(type(df[(df["Series Name"]=="Logistics performance index: Overall (1=low to 5=high)") &
       (df["Country Name"]==i) &
       (df["year"].isin(categories))]["value"]))

<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>


In [22]:
df[(df["Series Name"]=="Logistics performance index: Overall (1=low to 5=high)") &
       (df["Country Name"]=="Albania") &
       (df["year"].isin(categories))]["value"].values.tolist()

[nan, nan, 2.412498, nan, 2.66, nan]

In [26]:
import plotly.graph_objects as go

type = ["border compliance", "documentary compliance"]

fig = go.Figure()

border_compliance = []
for country in countries:
  border_compliance

fig.add_trace(go.Scatter(
x=df[(df["Series Name"]=='Time to export, border compliance (hours)') &
    (df["Country Name"]==i) &
    (df["year"].isin(years))]["value"].values.tolist(),
y=type,
#marker=dict(color="crimson", size=12),
mode="markers",
name=country,
))

# fig.add_trace(go.Scatter(
#     x=[100, 32],
#     y=type,
#     #marker=dict(color="crimson", size=12),
#     mode="markers",
#     name="Women",
# ))

# fig.add_trace(go.Scatter(
#     x=[92, 94, 100, 107, 112, 114, 114, 118, 119, 124, 131, 137, 141, 151, 152, 165],
#     y=type,
#     marker=dict(size=20),
#     mode="markers",
#     name="Men",
# ))

fig.update_layout(xaxis_title="Time (Hours)",
                  #yaxis_title="School"
                )

fig.show()

In [28]:
df[(df["Series Name"]=='Time to export, border compliance (hours)') &
       (df["Country Name"]=="Afghanistan") &
       (df["year"].isin(years))]["value"].values.tolist()

[48.0, 48.0, 48.0, 48.0, 48.0]

In [29]:
df["Country Name"].unique()

array(['Afghanistan', 'Albania', 'Algeria', 'Angola',
       'Antigua and Barbuda', 'Argentina', 'Armenia', 'Australia',
       'Austria', 'Azerbaijan', 'Bahamas, The', 'Bahrain', 'Bangladesh',
       'Barbados', 'Belarus', 'Belgium', 'Belize', 'Benin', 'Bhutan',
       'Bolivia', 'Bosnia and Herzegovina', 'Botswana', 'Brazil',
       'Brunei Darussalam', 'Bulgaria', 'Burkina Faso', 'Burundi',
       'Cabo Verde', 'Cambodia', 'Cameroon', 'Canada',
       'Central African Republic', 'Chad', 'Chile', 'China', 'Colombia',
       'Comoros', 'Congo, Dem. Rep.', 'Congo, Rep.', 'Costa Rica',
       "Cote d'Ivoire", 'Croatia', 'Cyprus', 'Czechia', 'Denmark',
       'Djibouti', 'Dominica', 'Dominican Republic', 'Ecuador',
       'Egypt, Arab Rep.', 'El Salvador', 'Equatorial Guinea', 'Eritrea',
       'Estonia', 'Eswatini', 'Ethiopia', 'Fiji', 'Finland', 'France',
       'Gabon', 'Gambia, The', 'Georgia', 'Germany', 'Ghana', 'Greece',
       'Grenada', 'Guatemala', 'Guinea', 'Guinea-Bissau', 'G

In [21]:
# for country in countries:
#     for year in years:
#         # fig.add_trace(go.Scatter(
#         # x=
#         df[(df["Series Name"]=='Time to export, border compliance (hours)') &
#             (df["Country Name"]==i) &
#             (df["year"]==year)]["value"].values.tolist(),
#         # y=type,
#         # #marker=dict(color="crimson", size=12),
#         # mode="markers",
#         # name=i,
#         # ))
# #fig.show()

In [16]:
years = ["2014", "2015", "2016", "2017", "2018"]
df[(df["Series Name"]=='Time to export, border compliance (hours)') |
       (df["Series Name"]=='Time to export, documentary compliance (hours)') &
       (df["Country Name"]==i) &
       (df["year"].isin(years))]["value"].values.tolist()

[61.33333,
 50.66667,
 61.33333,
 50.66667,
 61.33333,
 50.66667,
 61.33333,
 50.66667,
 61.33333,
 50.66667]