In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

#  INTRODUCTION

In [None]:
import pandas as pd 
pd.options.display.max_columns = None
import matplotlib.pyplot as plt
import seaborn as sns

pd.set_option('display.max_columns', 355)
pd.set_option('display.max_rows', 355)
pd.set_option('display.max_colwidth', 200)

In [None]:
data = pd.read_csv('/kaggle/input/kaggle-survey-2020/kaggle_survey_2020_responses.csv')
data.head()

In [None]:
#DROP THE FIRST RAW WITH QUESTIONS
df = data.drop([0])
df.head()

# DATA SUMMARY

In [None]:
df.shape

In [None]:
df.info()

In [None]:
df.describe().T

In [None]:
df.isna().sum().T

In [None]:
df[['Q1', 'Q2', 'Q3', 'Q4', 'Q5', 'Q6']].isna().sum()

# DATA PREPARATION

In [None]:
#DICTIONARY OF HEADERS AND CORRESPONDING QUESTIONS
column_names = data.columns.tolist()
questions = data.loc[0, :].values.tolist()
questions_dict = dict(zip(column_names, questions))
questions_dict

In [None]:
df['Q2'].value_counts()/df['Q2'].shape[0]

In [None]:
#CHANGE 'PREFER NOT TO SAY', 'PREFER TO SELF-DESCRIBE'AND 'NONBINARY' TO OTHER
def gender_grp(gen):
    if gen == 'Man'or gen == 'Woman':
        return gen
    else:
        return 'Other'
df['Q2'] = df['Q2'].apply(gender_grp)
df['Q2'].unique()

In [None]:
#REPLACE THE LONG COUNTRY NAMES TO SHORT READABLE ONES 
df['Q3'] = df['Q3'].str.replace('United Kingdom of Great Britain and Northern Ireland', 'UK & NI')
df['Q3'] = df['Q3'].str.replace('United States of America', 'USA')
df['Q3'].unique()


# PROFILE

The following four visualization focus on getting a background understanding of participation in the survey based on the respondent's age, country, gender, and highest level of formal education.

Most of the survey respondents are young  with 20% within the ages 25-29. There are 18.8% and 17.3 % within the ages 18-21 and 22-24 which may be indicative that a lot of young people are taking interest in the Data Science field. In the ages 45-70+ participation barely reaches 5%.

India, USA, Brazil, Japan and Russia are the top 5 countries with the most participation, with India being the highest of all countries having 29.2% respondesnts, which is more than double the percentage of USA that follows it(11.2%). Among Afrian countries that participated, only Nigeria made it to the top 7 with 2.4% respondents, while the others are below 1%.

As expected, participation is predominantly from male persons, reaching approimately 78.8% than females with only 19.4% respondents. This is not surprising as many tech spaces suffer low representation of females, however it is concerning. Individuals in the category Other are those who were classified under Prefer not to say, Prefer to self-describe and Nonbinary and have a participation below 2%. 

It is pleasing to see that most participants have some level of higher education. Most participants have a Master degree(39.2%) almost equal to those with Bachelors degree (34.8%). 


In [None]:
fig, ax = plt.subplots(figsize=(20,5))
country = sns.countplot(df['Q1'].sort_values(), palette='mako_r', ax=ax)
_ = country.set(xlabel='AGE GROUPS')
_ = plt.xticks(rotation=45)
_ = plt.ylabel('COUNT')
for p in ax.patches:
                 ax.annotate("%.1f%%" % (100*float(p.get_height()/df.shape[0])), (p.get_x() + p.get_width() / 2., abs(p.get_height())),
                     ha='center', va='bottom', color='black', xytext=(0, 10),rotation = 'horizontal',
                     textcoords='offset points')
_ = plt.show()

In [None]:
fig, ax = plt.subplots(figsize=(20,5))
country = sns.countplot(df['Q3'].sort_values(), order= df['Q3'].value_counts().index, palette='mako_r', ax=ax)
country.set(xlabel='COUNTRY OF RESIDENCE')
plt.xticks(rotation=45)
plt.ylabel('COUNT')
for p in ax.patches:
                 ax.annotate("%.1f%%" % (100*float(p.get_height()/df.shape[0])), (p.get_x() + p.get_width() / 2., abs(p.get_height())),
                     ha='center', va='bottom', color='black', xytext=(0, 10),rotation = 'vertical',
                     textcoords='offset points')
plt.show()

In [None]:
fig, ax = plt.subplots(figsize=(7, 7))
ax.pie(df['Q2'].value_counts(), explode=(0, 0.1, 0), labels=['Male', 'Female', 'Other'], autopct='%1.1f%%', shadow=True, startangle=90)
ax.axis('equal')


In [None]:
fig, ax = plt.subplots(figsize=(20,5))
country = sns.countplot(df['Q4'], palette='mako_r', ax=ax)
_ = country.set(xlabel='HIGHEST LEVEL OF FORMAL EDUCATION')
_ = plt.xticks(rotation=45)
_ = plt.ylabel('COUNT')

for p in ax.patches:
                 ax.annotate("%.1f%%" % (100*float(p.get_height()/df.shape[0])), (p.get_x() + p.get_width() / 2., abs(p.get_height())),
                     ha='center', va='bottom', color='black', xytext=(0, 10),rotation = 'horizontal',
                     textcoords='offset points')
_ = plt.show()

In [None]:
#DOES THE PARTICIPATION OF GENDER GROUPS DIFFER BY AGE?
fig, ax = plt.subplots()

age_by_gender = df.groupby('Q1')['Q2'].value_counts(normalize=True).unstack()

title='Proportion of gender groups for different ages of Data Science specialists'
age_by_gender.plot(kind='bar', figsize=(12,6), rot=45, title=title, stacked=True, cmap=plt.get_cmap('Accent_r'), ax=ax)
ax.set_xlabel('Age group')
ax.set_ylabel('Proportion of Gender')
ax.legend(title='Gender')
plt.show()

# AFRICA VERSUS THE WORLD EDA

In [None]:
#CREATE A DATAFRAME WITH ONLY AFRICAN COUNTRIES 
africa = ['Nigeria', 'South Africa', 'Egypt', 'Morocco','Tunisia', 'Kenya', 'Ghana']
african_df = df[df['Q3'].isin(africa)]
african_df['Q3'].unique()

In [None]:
#MAP EACH COUNTRY TO ITS CONTINENT AND 
#ADD A NEW COLUMN TO THE ORIGINAL DATAFRAME CALLED 'CONTINENT'  
other = 'Other'
australia = 'Australia'
s_america = ['Brazil', 'Colombia', 'Peru', 'Chile', 'Argentina']
n_america = ['United States of America','Canada', 'Mexico']
africa = ['Nigeria', 'South Africa', 'Egypt', 'Morocco','Tunisia', 'Kenya', 'Ghana']
europe = ['United Kingdom of Great Britain and Northern Ireland' , 'Italy', 'France', 'Germany', 'Spain', 'Turkey', 'South Korea', 'Ukrain', 'Poland', 'Ireland', 'Greece', 'Sweden', 'Portugal', 'Netherlands', 'Belgium', 'Switzerland', 'Romania', 'Belarus']
asia = ['Russsia', 'India', 'China', 'Japan', 'Pakistan', 'Iran, Islamic Republic of', 'Indonesia', 'Taiwan', 'Philippines', 'Bangladesh', 'Sri Lanka', 'Singapore', 'Thailand', 'Bangladesh', 'Viet Nam', 'Malaysia', 'Republic of Korea', 'Nepal', 'United Arab Emirates', 'Republic of Korea', 'Saudi Arabia', 'Turkey', 'Israel'] 

mapping = { 'Russsia': 'Asia', 'India': 'Asia', 'China': 'Asia', 'Japan': 'Asia', 'Pakistan': 'Asia', 'Iran, Islamic Republic of': 'Asia', 'Indonesia': 'Asia', 'Taiwan': 'Asia', 'Philippines': 'Asia', 'Bangladesh': 'Asia', 'Sri Lanka': 'Asia', 'Singapore': 'Asia', 'Thailand': 'Asia', 'Bangladesh': 'Asia', 'Viet Nam': 'Asia', 'Malaysia': 'Asia', 'Republic of Korea': 'Asia', 'Nepal': 'Asia', 'United Arab Emirates': 'Asia', 'Republic of Korea': 'Asia', 'Saudi Arabia': 'Asia', 'Turkey': 'Asia', 'Israel': 'Asia', 'UK & NI': 'Europe' , 'Italy': 'Europe', 'France': 'Europe', 'Germany': 'Europe', 'Spain': 'Europe', 'Turkey': 'Europe', 'South Korea': 'Europe', 'Ukrain': 'Europe', 'Poland': 'Europe', 'Ireland': 'Europe', 'Greece': 'Europe', 'Sweden': 'Europe', 'Portugal': 'Europe', 'Netherlands': 'Europe', 'Belgium': 'Europe', 'Switzerland': 'Europe', 'Romania': 'Europe', 'Belarus': 'Europe', 'Nigeria': 'Africa', 'South Africa': 'Africa', 'Egypt' : 'Africa', 'Morocco': 'Africa','Tunisia': 'Africa', 'Kenya': 'Africa', 'Ghana': 'Africa','USA': ' North America','Canada': ' North America', 'Mexico': ' North America', 'Brazil': 'South America', 'Colombia': 'South America', 'Peru': 'South America', 'Chile': 'South America', 'Argentina': 'South America', 'Australia':'Australia', 'Other':'Other'}
df['continent'] = df['Q3'].map(mapping)
df['continent'].unique()


Most of the questions asked in the survey consisted different options to choose from. One individual could choose more than one answer in the options provided,hence the dataframe has multiple columns asking the same question. The fuctions africa_melt() and continent_melt() will melt the multiple-answer question into one column  while keeping the single-answers questions in the melted dataframe throughout. 
The crosstable() fuction creates a subset of the desired columns to answer my questions, then plot a bar graph with with bar_plot().

In [None]:

def africa_melt(list_1):
    melted_df = pd.melt(african_df, id_vars=['Q1', 'Q2', 'Q3', 'Q4', 'Q5', 'Q6', 'Q8','Q15', 'Q24', 'Q25', 'Q38'], value_vars=list_1)
    return melted_df

def continent_melt(my_list):
    melted = pd.melt(df, id_vars= ['Q1', 'Q2', 'Q3','continent', 'Q4', 'Q5', 'Q6', 'Q8','Q15', 'Q24', 'Q25', 'Q38'], value_vars= my_list)
    return melted

def crosstable(column1, column2):
    table = pd.crosstab(column1, column2).apply(lambda r: r/r.sum(), axis=1)
    return table

def bar_plot(data, title, xlabel, ylabel, legend_title):
    fig, ax = plt.subplots()
    data.plot(kind='bar', title=title, figsize=(20,8), cmap=plt.get_cmap('brg'), ax=ax)
    ax.set_xlabel(xlabel)
    ax.set_ylabel(ylabel)
    plt.legend(title=legend_title)
    plt.show()
   


In [None]:
#WHAT IS THE DISTRIBUTION OF ROLES ACROSS AFRICAN COUNTRIES?
roles_count = crosstable(african_df['Q3'], african_df['Q5'])

bar_plot(roles_count, 'DISTRIBUTION OF ROLES ACROSS AFRICAN COUNTRIES', 'COUNTRY', 'PROPORTION OF ROLES OF DATA SPECIALIST', 'ROLES')


All the african countries have a spike of students compared to the other roles. This illustrates that more and more african people are considering a career in data science. Are we heading to  more data driven africa?. Despite the high participation of Nigeria seen earlier, it is interesting to see that Ghana has taken a lead with individuals that are students. All the roles are below 20%. Zooming into the diffent roles, South Africa and Ghana have more Data Scientist, with Morocco, Nigeria and Tunisia almost reaching the same level of representation, and Ghana having the lowest proportion of data scientist. There is very poor representation of Database Engineers in africa, few are present in Kenya, Morroco and Nigeria and none found in Egypt, Ghana, South Africa and Tunisia. 


In [None]:
#WHAT IS THE DISTRIBUTION OF ROLES IN OTHER CONTINENTS COMPARED TO AFRICA?
cont_roles = crosstable(df['Q5'], df['continent'])
bar_plot(cont_roles,'ROLES OF DATA SCIENCE SPECIALISTS WITHIN THE DIFFERENT CONTINENTS', 'DATA SPECIALIST ROLES', 'PROPORTION OF CONTINENT', 'CONTINENT')



In [None]:
#WHICH PROGRAMMING LANGUAGE IS MOSTLY USED IN AFRICA AND BY WHICH GENDER
#A dataframe with programming languages 
programming_lang = ['Q7_Part_1', 'Q7_Part_2', 'Q7_Part_3', 'Q7_Part_4', 'Q7_Part_5', 'Q7_Part_6', 'Q7_Part_7','Q7_Part_8', 'Q7_Part_9', 'Q7_Part_10', 'Q7_Part_11', 'Q7_Part_12', 'Q7_OTHER']
africa_pl = africa_melt(programming_lang)    
 
#plot a stacked bar graph
fig, ax = plt.subplots(figsize=(20,4))
title = 'PROGRAMMING LANGUAGES VS GENDER IN AFRICA'
africa_pl.groupby(['value', 'Q2'])['Q2'].count().unstack().sort_values(by=['Man', 'Woman', 'Other'], ascending=False)\
.plot(kind='barh', stacked=True, title=title, cmap=plt.get_cmap('winter'), figsize=(12,5), ax=ax)
plt.ylabel('COUNT')
plt.xlabel('PROGRAMMING LANGUAGE')
plt.legend(title='GENDER')


Python is the most used programming language in Africa. The top 5 languages are Python, SQL, R, Java and Javascript. As as already seen ealier, Man are predominating, even in the use of the programming languages.There is no language that is used by woman more than males. The least common languages are Julia and Swift. 

In [None]:
#WHAT IS THE DISTRIBUTION OF PROGRAMMING LANGUAGES WITHIN AFRICAN COUNTRIES?
lang = crosstable(africa_pl['value'], africa_pl['Q3'])
bar_plot(lang, 'THE DISTRIBUTION OF PROGRAMMING LANGUAGES WITHIN AFRICAN COUNTRIES', 'PROGRAMMING LANGUAGES', 'PROPORTION OF COUNTRY', 'COUNTRY')

The programming languages are used differently by each country. Although python is the commonly used language across africa and Julia being the least, the most and least common languages respectively, within africa countries is as follows: Julia and R for Egypt, R and C for Kenya, C and Python for Morocco,Julia and C for Nigeria, Swift and C/MATLAB for South Africa, C and Bash/Other for Tunisia. Ghana always has the lowest proportion for all the programming languages compared to other contries, with Javascript, R and C as the most and least common languages respectively.

In [None]:
#WHAT IS THE DISTRIBUTION OF PROGRAMMING LANGUAGES ACROSS DIFFERENT CONTINENTS? 
#a dataframe with programming languages
cont_pl = continent_melt(programming_lang)
cont_lang = crosstable(cont_pl['continent'],cont_pl['value'])
bar_plot(cont_lang, ' THE DISTRIBUTION OF PROGRAMMING LANGUAGES ACROSS DIFFERENT CONTINENTS', 'CONTINENTS', 'PROPORTION OF PROGRAMMING LANGUAGES', 'PROGRAMMING LANGUAGE')

A similar pattern of usage across the continents is observed for the various programming Languages. Python seems to have a spike in all the continents and SQL always coming second. Suprisingly, Python is mostly used by Africa than Asia and Europe and the other continents. Julia and swift have the lowest proportion of use across all the continents. The top three used languages in the studied continents are Python, SQL and R except in Asia where we see R replaced by Java and also being the least used compared to other continents.

In [None]:
#WHICH PROGRAMMING LANGUAGE DO PEOPLE WITH DIFFERENT YEARS OF EXPERIENCE RECOMMEND?
sns.set()
fig, ax = plt.subplots(figsize=(12,6))
cont_pl['years'] = cont_pl.groupby('Q6').transform('count')['Q8']
table = pd.crosstab(cont_pl['Q6'], cont_pl['Q8'], values=cont_pl['years'], aggfunc='count' ).apply(lambda r:r/r.sum(), axis=1)
experience = ['<1 years','1-2 years', '3-5 years', '5-10 years', '10-20 years', '20+ years', 'I have never written code']

h = sns.heatmap(table, annot=True, cmap='YlGnBu', cbar=True, linewidth=0.5, yticklabels=experience, ax=ax)
h.set_title('RECOMMENDATION OF PRGRAMMING LANGUAGE BY PEOPLE WITH DIFFERENT YEARS WRITTING CODE')
h.set(ylabel='YEARS WRITTING CODE', xlabel='PROGRAMMING LANGUAGE')

For all the years of experience in writting code that different people have, we see that over 75% of the people recommend python. Again R and SQL follows Python with recommendations also. 
Holding the isights drawn from the previous graphs, the heatmap illustrates that an individual is most likely to recommend a programming language they use. 

In [None]:
#DO MEN AND WOMEN WITH DIFFERENT YEARS OF EXPERIENCE HAVE THE SAME COMPENSATION?

order_c = ['$0-999','1,000-1,999', '2,000-2,999', '3,000-3,999', '4,000-4,999', '5,000-7,499', '7,500-9,999', '10,000-14,999', '15,000-19,999', '20,000-24,999', '25,000-29,999', '30,000-39,999', '40,000-49,999', '50,000-59,999', '60,000-69,999', '70,000-79,999', '80,000-89,999', '90,000-99,999', '100,000-124,999', '150,000-199,999', '200,000-249,999', '> $500,000']
experience = ['<1 years','1-2 years', '3-5 years', '5-10 years', '10-20 years', '20+ years']

yearly_c = sns.catplot(x='Q24', hue='Q2',  order=order_c, data=africa_pl, kind='count', row='Q6', palette='mako_r', sharex=True, height=3, aspect=10/3)
        
_ = yearly_c.fig.suptitle('YEARLY COMPENSATION BY YEARS OF EXPERIENCE FOR  DIFFERENT GENDERS', y=1.03)
_ = yearly_c.set(xlabel='YEARLY COMPENSATION', ylabel='COUNT')
_ = plt.xticks(rotation=90)

This graph shows that most people between <1 year to 5 years writting code get compensated an amount between 0-1000$. The  number of both males and females who are getting compensated decrease as the amount of compansation increase. There is no clear pattern between <1 year to 5 years writting code of how experience(i.e. how long have you written code) influence compensation amount. Looking at those with 10-20 and also 20+ years of experience, there is some evidence that the relationship exist as there is just few people for the lesser amounts (<= 10000-15000), in fact they start increasing to higher amounts from 7500-9999.

From 1-2 years to 20+years, there is no woman getting compassated above 40000.

It is important to note that this result is just a count of how much the people who participated were getting compasated depending on how many people were in each category of years writing code. 

In [None]:
#WHICH PLATFORM DO AFRICAN PEOPLE USE TO COMPLETE DATA SCIENCE COURSES

#a dataframe with platforms where peple complete data science courses
courses = ['Q37_Part_1', 'Q37_Part_2', 'Q37_Part_3', 'Q37_Part_4', 'Q37_Part_5', 'Q37_Part_6', 'Q37_Part_7','Q37_Part_8', 'Q37_Part_9', 'Q37_Part_10', 'Q37_Part_11', 'Q37_OTHER']
ds_platform_df = africa_melt(courses)

ds_platform = crosstable(ds_platform_df['Q3'], ds_platform_df['value'])

bar_plot(ds_platform, 'PLATFORMS USED TO COMPLETE DATA SCIENCE COURSES IN AFRICA', 'COUNTRY', 'PROPORTION OF PLATFORMS PROVIDING DATA SCIENCE COURSES', 'PLATFORMS ' )

In [None]:
ds_platform.T.nlargest(5, africa)

Coursera the most common platform where african people complete their Data Science course. But in South Africa, Udemy seems to be the platform commonely used, while in Kenya Kaggle learn courses are at the same rate as Cloud Certificates programs. Fast.ai and Cloud-certification programs seem to be the least common platforms, barely reaching 5% proportion, except in Tunisia when the proportion of Cloud-certification programs is slightly above 5%. The top 5 common platforms in africa are Coursera, Udemy, Kaggle Learn Courses, Datacamp and Udacity. 

In [None]:
#WHAT IS THE DISTRIBUTION OF MACHINE LEARNING FRAMEWORKS ACROSS AFRICA 

#A dataframe with machine learning frameworks and algorithms
ml_frameworks_list = ['Q16_Part_1', 'Q16_Part_2', 'Q16_Part_3', 'Q16_Part_4', 'Q16_Part_5', 'Q16_Part_6', 'Q16_Part_7','Q16_Part_8', 'Q16_Part_9', 'Q16_Part_10', 'Q16_Part_11', 'Q16_Part_12', 'Q16_Part_13',  'Q16_Part_14', 'Q16_Part_15', 'Q16_OTHER']
ml_algorithms_list = ['Q17_Part_1', 'Q17_Part_2', 'Q17_Part_3', 'Q17_Part_4', 'Q17_Part_5', 'Q17_Part_6', 'Q17_Part_7','Q17_Part_8', 'Q17_Part_9', 'Q17_Part_10', 'Q17_Part_11', 'Q17_OTHER']

ml_frameworks_df = africa_melt(ml_frameworks_list)
ml_algorithms_df = africa_melt(ml_algorithms_list)
ml_products_df = africa_melt(ml_products_list)

ml_df = ml_frameworks_df.merge(ml_algorithms_df, on = ['Q1', 'Q2', 'Q3', 'Q4', 'Q5', 'Q6', 'Q8','Q15', 'Q24', 'Q25', 'Q38'], suffixes = ['_frame', '_alg'])

frame_afr = crosstable(ml_df['Q3'], ml_df['value_frame'])
bar_plot(frame_afr, 'THE DISTRIBUTION OF MACHINE LEARNING FRAMEWORKS ACROSS AFRICA', 'COUNTRY', 'PROPORTION OF MACHINE LEARNING FRAMEWORKS', 'MACHINE LEARNING FRAMEWORKS' )

In [None]:
#WHAT IS THE DISTRIBUTION OF MACHINE LEARNING ALGORITHMS ACROSS AFRICA 
alg_afr = crosstable(ml_df['Q3'], ml_df['value_alg'])
bar_plot(alg_afr, 'THE DISTRIBUTION OF MACHINE LEARNING ALGORITHMS ACROSS AFRICA', 'COUNTRY', 'PROPORTION OF MACHINE LEARNING ALGORITHMS', 'ML ALGORITHM')

In [None]:
#WHAT IS THE DISTRIBUTION OF MACHINE LEARNING FRAMEWORKS ACROSS DIFFERENT CONTINENTS?
cont_ml_frame = continent_melt(ml_frameworks_list)
cont_ml_alg = continent_melt(ml_algorithms_list)
cont_ml = cont_ml_frame.merge(cont_ml_alg, on=['Q1', 'Q2', 'Q3','continent', 'Q4', 'Q5', 'Q6', 'Q8','Q15', 'Q24', 'Q25', 'Q38'], suffixes = ['_frame', '_alg'])

frame = crosstable(cont_ml['continent'], cont_ml['value_frame'])
bar_plot(frame, 'THE DISTRIBUTION OF MACHINE LEARNING FRAMEWORKS ACROSS DIFFERENT CONTINENTS', 'CONTINENT', 'PROPORTION OF MACHINE LEARNING FRAMEWORKS', 'ML FRAMEWORKS')

In [None]:
#WHAT IS THE DISTRIBUTION OF MACHINE LEARNING ALGORITHMS ACROSS DIFFERENT CONTINENTS?
frame = crosstable(cont_ml['continent'], cont_ml['value_alg'])
bar_plot(frame, 'THE DISTRIBUTION OF MACHINE LEARNING ALGORITHMS ACROSS DIFFERENT CONTINENTS', 'CONTINENT', 'PROPORTION OF MACHINE LEARNING ALGORITHMS', 'ML ALGORITHM')

Scikit-Learn is commonly used in all the african countries, with TensorFlow coming second and Keras third. A similar pattern s observed for the use of these machine learning frameworks across continents.

The top three machine Learning algorithms in africa are Linear and Logistic Regression, Decision Trees or Random Forest and Convolutional Neural Networks. In Kenya however, Bayesian Approaches are used more than Convolutional Neural Networks. Across conntinents, the same trend holds except that in some continents, Generative Adversarial Networks and Bayesian Approaches seem to overtake Convolutional Neural Networks.  