In [1]:
#Importing libraries
import pandas as pd
from pathlib import Path

In [2]:
#Importing data
censorship_data_csv = Path("Resources/Censorship_Data_by_Country.csv")
population_data_csv = Path("Resources/Country_Population_2023.csv")

#Reading data
censorship_data = pd.read_csv(censorship_data_csv,encoding='utf-8')
population_data = pd.read_csv(population_data_csv,encoding='utf-8')

In [3]:
#Inspecting data for Censorship
censorship_data.head()

Unnamed: 0,Flag,Country,Score,Torrents Restricted?,Torrents Banned or Shut Down?,Pornography Restricted?,Pornography Banned?,Political Media Restricted?,Political Media Heavily Censored?,Social Media Restricted?,Social Media Banned?,VPNs Restricted,VPNs Banned?,Messaging and VoIP App Restrictions
0,:af:,Afghanistan,6,Y,N,Y,Y,Y,Y,Y,N,N,N,N
1,:al:,Albania,2,Y,N,N,N,Y,N,N,N,N,N,N
2,:dz:,Algeria,5,Y,N,Y,N,Y,Y,Y,N,N,N,N
3,:ad:,Andorra,1,Y,N,N,N,N,N,N,N,N,N,N
4,:ao:,Angola,2,Y,N,N,N,Y,N,N,N,N,N,N


In [4]:
#Inspecting data for Population
population_data.head()

Unnamed: 0,#,Country (or dependency),Population (2023)
0,1,India,1428627663
1,2,China,1425671352
2,3,United States,339996563
3,4,Indonesia,277534122
4,5,Pakistan,240485658


In [5]:
#Cleaning and Renaming Population Data
population_data = population_data.rename(columns={"Country (or dependency)":"Country"})
population_data = population_data.drop(columns=["#"])
population_data.head()


Unnamed: 0,Country,Population (2023)
0,India,1428627663
1,China,1425671352
2,United States,339996563
3,Indonesia,277534122
4,Pakistan,240485658


In [6]:
#Merging Censorship and Population
censor_data = pd.merge(censorship_data, population_data, on="Country", how="left")
censor_data.head()

Unnamed: 0,Flag,Country,Score,Torrents Restricted?,Torrents Banned or Shut Down?,Pornography Restricted?,Pornography Banned?,Political Media Restricted?,Political Media Heavily Censored?,Social Media Restricted?,Social Media Banned?,VPNs Restricted,VPNs Banned?,Messaging and VoIP App Restrictions,Population (2023)
0,:af:,Afghanistan,6,Y,N,Y,Y,Y,Y,Y,N,N,N,N,42239854
1,:al:,Albania,2,Y,N,N,N,Y,N,N,N,N,N,N,2832439
2,:dz:,Algeria,5,Y,N,Y,N,Y,Y,Y,N,N,N,N,45606480
3,:ad:,Andorra,1,Y,N,N,N,N,N,N,N,N,N,N,80088
4,:ao:,Angola,2,Y,N,N,N,Y,N,N,N,N,N,N,36684202


In [7]:
#Importing additional data set 
internet_data_csv = Path("Resources/internet-data-csv.csv")

#Reading data
internet_data = pd.read_csv(internet_data_csv)

#Keeping only the columns needed
internet_data = internet_data[["Country", "Edition", "4.1.4) Level of web accessibility", "4.2.1) Privacy regulations"]]

internet_data.head()

Unnamed: 0,Country,Edition,4.1.4) Level of web accessibility,4.2.1) Privacy regulations
0,Algeria,E1,0.0,0.0
1,Algeria,E2,0.0,0.0
2,Algeria,E3,0.0,2.0
3,Algeria,E4,0.0,2.0
4,Algeria,E5,0.0,2.0


In [8]:
# Eliminating rows for past years (E1-E4)
internet_data = internet_data.loc[internet_data["Edition"] == "E5"]

# Dropping edition column 
internet_data = internet_data.drop(columns=["Edition"])

internet_data.head()

Unnamed: 0,Country,4.1.4) Level of web accessibility,4.2.1) Privacy regulations
4,Algeria,0.0,2.0
9,Angola,2.0,2.0
14,Argentina,2.0,2.0
19,Australia,2.0,2.0
24,Austria,3.0,2.0


In [9]:
# Merge internet data with censorship data
all_data = pd.merge(censor_data, internet_data, on="Country", how="left")
all_data.head()

Unnamed: 0,Flag,Country,Score,Torrents Restricted?,Torrents Banned or Shut Down?,Pornography Restricted?,Pornography Banned?,Political Media Restricted?,Political Media Heavily Censored?,Social Media Restricted?,Social Media Banned?,VPNs Restricted,VPNs Banned?,Messaging and VoIP App Restrictions,Population (2023),4.1.4) Level of web accessibility,4.2.1) Privacy regulations
0,:af:,Afghanistan,6,Y,N,Y,Y,Y,Y,Y,N,N,N,N,42239854,,
1,:al:,Albania,2,Y,N,N,N,Y,N,N,N,N,N,N,2832439,,
2,:dz:,Algeria,5,Y,N,Y,N,Y,Y,Y,N,N,N,N,45606480,0.0,2.0
3,:ad:,Andorra,1,Y,N,N,N,N,N,N,N,N,N,N,80088,,
4,:ao:,Angola,2,Y,N,N,N,Y,N,N,N,N,N,N,36684202,2.0,2.0


In [10]:
# Creating a csv for easy import into other code
all_data.to_csv("Resources/all_data.csv", encoding='utf-8')