In [1]:
import pandas as pd # Loading up pandas to load the excel files into dataframes
pd.set_option("display.max_rows", None) # So that the jupyter notebook prints all of the rows instead of truncating them

## Setting up class for Dataset for easier setting up of dataframes

In [2]:
class Dataset():
    Datasets = [] # List of all datasets
    def __init__(self, type, path, name):
        self.type = type # filetype so we can load it using the correct function
        self.path = path # file path so we can access the file from it's location if it isn't in the same directory
        self.name = name # File name to be referred to when working
        
        if self.type == "Excel":
            self.df = pd.read_excel(self.path) # Loads up the file if it's in .xlsx format
        elif self.type == "csv":
            self.df = pd.read_csv(self.path) # Loads up the file if it's in .csv format
        
        self.columns = list(self.df.columns.values) # Makes a list of all the column names
        
        Dataset.Datasets.append(self) # Adds the object to the list of the datasets

## Loading up the dataset

In [3]:
Digital_Connectivity = Dataset("csv", "./digitalconnectivity.csv", "Digital Connectivity") #Setting up object
dfDC = Digital_Connectivity #renaming it to something easier to type, keeping original name as a separate variable

In [4]:
# Deleting rows / columns that weren't needed, keeping only the data needed for the presentation
dfDC.df = dfDC.df.drop(columns=["ISO3","Sub-region", "Residence", "Unnamed: 7", "Wealth quintile", "Unnamed: 9", "Source", "Unnamed: 11"])
dfDC.df = dfDC.df.drop(labels=[0], axis=0)
dfDC.df.head() # Printing to make sure the dropping went correct

Unnamed: 0,Countries and areas,Region,Income Group,Total
1,Algeria,MENA,Upper middle income (UM),24%
2,Angola,SSA,Lower middle income (LM),17%
3,Argentina,LAC,Upper middle income (UM),40%
4,Armenia,ECA,Upper middle income (UM),81%
5,Bangladesh,SA,Lower middle income (LM),37%


## Exporting

In [5]:
Regions = dfDC.df["Region"].unique() #List of unique region names, to export countries by region

for name in Regions: #For every region:
    x = dfDC.df.loc[dfDC.df["Region"] == name] #Create a new Dataframe of the countries in that region
    filename = "./DigitalConnectivity/" + str(name) + ".xlsx" #name of file and its desired path, as an excel file
    
    x.to_csv(filename, index=False) #Export to filename, without index