In [None]:
import pandas as pd
import numpy as np
import geopandas as gpd
import matplotlib.pylab as plt
from dataprep.clean import clean_country
import pygal
import folium

In [None]:
# merge the two csv files together into a data frame
articleInfo = pd.read_csv("articleInfo.csv")
authorInfo = pd.read_csv("authorInfo.csv")

mergedInfo = pd.merge(articleInfo, authorInfo, 
                   on="Article No.", 
                   how="inner")
mergedInfo.fillna(0)


In [None]:
# plot the yearly publication figure: x - axis = year, y -axis = # articles published during that year
yearPub = mergedInfo.loc[:,"Year"].to_list() # TODO:see if it is from merged info or the single publication
numArticles = {i:yearPub.count(i) for i in yearPub}

lists = sorted(numArticles.items()) # sorted by key, return a list of tuples

x, y = zip(*lists) # unpack a list of pairs into two tuples

plt.xlabel('Year')
plt.ylabel('Number of Articles Published')
plt.title("yearly_publication")

plt.plot(x, y)
plt.show()

In [None]:
# plot the yearly citation figure: x axis is the year and the y axis is the total # of citations in that year
yearPub = mergedInfo.loc[:,"Year"].to_list()
citations = mergedInfo.loc[:,"Citation"].to_list()

# create new data frame with information we want
temp = {"Year": yearPub, "Citation" : citations}
newDF = pd.DataFrame(temp)

# get the sum of the citations for each year and convert to dictionary
citations = newDF.groupby('Year')['Citation'].sum().to_dict()
x = citations.keys()
y = citations.values()

# plot the data
plt.plot(x, y)
plt.xlabel("Year")
plt.ylabel("Total No. of Citations")
plt.title("yearly_citation")
plt.show()


In [None]:
# plot the number of publications across countries
countriesPB = mergedInfo.loc[:,"Country"].to_list()
numCountries = {i:countriesPB.count(i) for i in countriesPB}

keys = numCountries.keys()
value = numCountries.values()

df = pd.DataFrame({"Countries" : keys, "Number of Publications" : value})
df = clean_country(df, "Countries",  input_format=("name", "official", "alpha-3"))
# get the alpha code for the country in order to make into map
df = clean_country(df, "Countries_clean", output_format="alpha-3")
# if nan -> remove
df = df.dropna()
print(df)

world = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))
cities = gpd.read_file(gpd.datasets.get_path('naturalearth_cities'))

m = world.explore(
     column="pop_est",  # make choropleth based on "BoroName" column
     scheme="naturalbreaks",  # use mapclassify's natural breaks scheme
     legend=True, # show legend
     k=10, # use 10 bins
     legend_kwds=dict(colorbar=False), # do not use colorbar
     name="countries" # name of the layer in the map
)

cities.explore(
     m=m, # pass the map object
     color="red", # use red color on all points
     marker_kwds=dict(radius=10, fill=True), # make marker radius 10px with fill
     tooltip="name", # show "name" column in the tooltip
     tooltip_kwds=dict(labels=False), # do not show column label in the tooltip
     name="cities" # name of the layer in the map
)

folium.TileLayer('Stamen Toner', control=True).add_to(m)  # use folium to add alternative tiles
folium.LayerControl().add_to(m)  # use folium to add layer control

m  # show map
  
