# The Status of ML in Developing Economies: the Case of Africa
***
*    [Angeliki Varela](https://www.linkedin.com/in/angeliki-varela-8a365b37) 
    *    email: avarela@chicagobooth.edu
    *    Kaggle: `ang3l1k1`
*    [Orest Xherija](https://www.linkedin.com/orestxherija) 
    *    email: xherija.orest@gmail.com 
    *    kaggle: `orestxherija`

In [None]:
"""
We create the groups of countries that we wish to study and compare
"""

GROUPS = {
    'United Kingdom of Great Britain and Northern Ireland' : 'EUROPE',
    'United States of America' : 'NORTH_AMERICA',
    'India' : 'ASIA',
    'Japan' : 'ASIA',
    'China' : 'ASIA',
    'Nigeria' : 'AFRICA',
    'Morocco' : 'AFRICA',
    'South Africa' : 'AFRICA',
    'Egypt' : 'AFRICA',
    'Tunisia' : 'AFRICA',
    'Kenya' : 'AFRICA',
    'Algeria' : 'AFRICA'
}

AI_LEADERS = [
    'China',
    'United States of America',
    'India',
    'Japan',
    'United Kingdom of Great Britain and Northern Ireland'
]

In [None]:
"""
Importing necessary modules
"""

import random
random.seed(2019)
import pandas
import seaborn
seaborn.set_style("darkgrid")
import numpy
import plotly
import plotly.colors

In [None]:
"""
Reading the dataset and preparing it for more convenient data analysis
"""

multiple_choice_responses = pandas.read_csv(
    filepath_or_buffer='../input/kaggle-survey-2019/multiple_choice_responses.csv', 
    skiprows=[1])

Q4_name_map = {
    'Master’s degree' : 4,
    'Professional degree' : 1,
    'Bachelor’s degree': 3,
    'Some college/university study without earning a bachelor’s degree': 2,
    'Doctoral degree': 6,
    'I prefer not to answer' : -1,
    'No formal education past high school' : 0,
}

Q23_name_map = {
    '< 1 years' : 0,
    '1-2 years' : 1,
    '2-3 years': 2,
    '3-4 years': 3,
    '4-5 years': 4,
    '5-10 years' : 5,
    '10-15 years' : 10,
    '20+ years' : 20,
}

Q15_name_map = {
    '< 1 years' : 0,
    '1-2 years' : 1,
    '3-5 years': 3,
    '5-10 years' : 5,
    '10-20 years' : 10,
    '20+ years' : 20,
    'I have never written code' : -1
}

Q11_name_map = {
    '$0 (USD)' : 0,
    '$1-$99' : 1,
    '$100-$999' : 100,
    '$1000-$9,999' : 1000,
    '$10,000-$99,999' : 10000,
    '> $100,000 ($USD)' : 100000
}

Q22_name_map = {
    'Never' : 0,
    'Once' : 1,
    '2-5 times' : 2,
    '6-24 times' : 6,
    '> 25 times' : 25
}


multiple_choice_responses['Q4'] = multiple_choice_responses['Q4'].apply(lambda x: Q4_name_map.get(x, numpy.nan))
multiple_choice_responses['Q23'] = multiple_choice_responses['Q23'].apply(lambda x: Q23_name_map.get(x, numpy.nan))
multiple_choice_responses['Q11'] = multiple_choice_responses['Q11'].apply(lambda x: Q11_name_map.get(x, numpy.nan))
multiple_choice_responses['Q15'] = multiple_choice_responses['Q15'].apply(lambda x: Q15_name_map.get(x, numpy.nan))
multiple_choice_responses['Q22'] = multiple_choice_responses['Q22'].apply(lambda x: Q22_name_map.get(x, numpy.nan))

# We will remove cases for which the country is indication is 'Other', as we have no information that we can use to put these respondents in a continent group.
multiple_choice_responses = multiple_choice_responses[multiple_choice_responses['Q3'] != 'Other']

# Add column to represent the continent to which the participants belong
multiple_choice_responses['continent'] = multiple_choice_responses['Q3'].apply(lambda x: GROUPS.get(x, 'OTHER'))

# Add column corresponding to whether country is among `AI_LEADERS`
multiple_choice_responses['ai_leader'] = multiple_choice_responses['Q3'].apply(lambda x: 'AI_LEADERS' if x in AI_LEADERS else 'NON_AI_LEADERS')

african_countries = multiple_choice_responses[multiple_choice_responses['continent'] == 'AFRICA'].copy()

ai_leaders_africa = multiple_choice_responses[(multiple_choice_responses['ai_leader'] == 'AI_LEADERS') | (multiple_choice_responses['continent'] == 'AFRICA') ].copy()
ai_leaders_africa['ai_leader'] = ai_leaders_africa['ai_leader'].apply(lambda x: x if x == 'AI_LEADERS' else 'AFRICA')

In [None]:
"""
We define two functions to help us generate the necessary vizualizations
"""

def create_plotly_barchart(
    dfs, 
    strat_cols, 
    quant_col, 
    width=None,
    height=None,
    names_map=None,
    percent=True, 
    xaxis=None,
    yaxis=None,
    legend_orientation='h',
    legend_x=None,
    legend_y=None,
    legend_yanchor='top',
    shared_yaxes=True,
    rows=1,
    cols=1,
    specs=None,
    subplot_titles=None,
    title=None,
    xaxis_titles=None,
    yaxis_titles=None,
    vertical_spacing=None,
    horizontal_spacing=None):
    
    assert len(dfs) == len(strat_cols) 
    if subplot_titles:
        assert len(dfs) == len(subplot_titles)
    if xaxis_titles:
        assert len(dfs) == len(xaxis_titles)
    if yaxis_titles:
        assert len(dfs) == len(yaxis_titles)
    
    # add more colours
    scales = []
    for l in list(plotly.colors.PLOTLY_SCALES.values()):
        for s in l:
            scales.append(s[1])
    random.shuffle(scales)
    colors = plotly.colors.DEFAULT_PLOTLY_COLORS
    colors += scales
    
    # place subplot titles
    fig = plotly.subplots.make_subplots(
        shared_yaxes=shared_yaxes,
        rows=rows, 
        cols=cols,
        specs=specs,
        subplot_titles= None if not subplot_titles else subplot_titles,
        vertical_spacing = vertical_spacing,
        horizontal_spacing = horizontal_spacing
        
    ) 
    
    # select subplot (row,col) pairs
    coordinates = []
    for r in range(1, rows+1):
        for c in range(1, cols+1):
            coordinates.append((r,c))
    
    for i, df in enumerate(dfs):
        
        df = pandas.crosstab(df[strat_cols[i]],df[quant_col])
        if percent:
            df = df.apply(lambda r: r/r.sum(), axis=1)
        
        if names_map:
            names = [names_map[n] for n in df.columns]
        
        r,c = coordinates[i]
        for j, x in enumerate(df.columns):
            
            # add graph object to figure
            fig.add_trace(plotly.graph_objs.Bar(
                name=names[j] if names_map else str(x), 
                x=df.index, 
                y=df[x],
                marker_color=colors[j],
                legendgroup=f'group-{j}',
                showlegend= (i==0),
            ),
                row=r,
                col=c)
            
            # name axes of subplots
            fig.update_xaxes(
                title_text=xaxis_titles[i],
                row=r, 
                col=c)
            fig.update_yaxes(
                title_text=yaxis_titles[i],
                row=r, 
                col=c)
            
            # impose range limit on axes
            if xaxis:
                fig.update_xaxes(
                    range=xaxis, 
                    row=r, 
                    col=c)
            if yaxis:
                fig.update_yaxes(
                    range=yaxis, 
                    row=r, 
                    col=c)
    
    # add figure title
    if title:
        fig.update_layout(title_text=title)
    
    # adjust margins and legend settings
    fig.update_layout(
        legend_orientation=legend_orientation, 
        legend_x=legend_x, 
        legend_y=legend_y,
        legend_yanchor=legend_yanchor,
        width=width,
        height=height,
        margin=plotly.graph_objs.layout.Margin(
            l=25,
            r=10,
            b=50,
            t=50,
            pad=2
    ))
    return fig

def create_polar_multipart(
    df, 
    df_cols, 
    x_names, 
    strat_col, 
    legend_orientation='h',
    legend_x=None,
    legend_y=None,
    title='Plot title',
    xaxis_title='x axis title',
    yaxis_title='y axis title'):

    
    assert len(df_cols) == len(x_names)
    
    new_df = ~df[df_cols].copy().isnull()
    new_df.columns = x_names
    new_df[strat_col] = df[strat_col].copy()

    
    datums = []
    
    for c in x_names:
        tup = []
        for x in new_df[strat_col].unique():
            slice_x = new_df[new_df[strat_col] == x].copy()
            tup.append(slice_x[c].sum()/slice_x.shape[0])
        datums.append(tuple(tup))
    
    tuple_lengths = [len(x) for x in datums]
    min_slices = min(tuple_lengths)
    
    colors = random.sample(plotly.colors.DEFAULT_PLOTLY_COLORS, min_slices)
    fig = plotly.graph_objs.Figure()
    
    for i in range(min_slices):
        fig.add_trace(plotly.graph_objs.Scatterpolar(
            theta=x_names,
            r=[x[i] for x in datums],
            name=f'{new_df[strat_col].unique()[i]}',
            fill='toself'
            ))
        
    # add plot title
    fig.update_layout(
        title={
            'text' : title,
            'y' : 0.87, 
            'x' : 0.4, 
            'xanchor' : 'center',
            'yanchor' : 'top'
        }
    )        
    
    # adjust margins and legend settings
    fig.update_layout(
        legend_orientation=legend_orientation, 
        legend_x=legend_x, 
        legend_y=legend_y,
        margin=plotly.graph_objs.layout.Margin(
            l=25,
            r=10,
            b=50,
            t=50,
            pad=2
    ))
    
    return fig

## Table of Contents
***

* [Introduction](#introduction)
    * [Why Africa](#why-africa)
    * [Methodology](#methodology)
        * [How We Identified the AI Leaders](#identify-leaders)
        * [Participating African Countries](#african-countries)
* [Data Analysis](#data-analysis)
    * [Demographics](#demographics)
        * [Age](#age)
        * [Gender](#gender)
        * [Education](#education)
    * [Developing Experience](#experience)
        * [Coding](#coding-years)
        * [Machine Learning](#ml-years)
    * [Sources of Education/Information](#info-edu)
        * [Media Platforms](#fav-media)
        * [Education Platforms](#edu-platforms)
    * [The Status of Machine Learning](#ml-status)
        * [Algorithms](#algos)
        * [Software](#ml-framework)
        * [Hardware](#hardware)
        * [Expenditure](#ml-expenditure)
* [Limitations](#limitations)
* [Conclusions and Recommendations](#conclusions)
* [References](#references)

## Introduction <a id="introduction"></a>
***

<div class="alert alert-success"> 
Artificial intelligence (AI) carries a promise of economic development through skill-based technological change whose impact is particularly significant for emerging economies (<b>Strusani & Houngbonon 2019</b>). Its impact is expected to contribute in bridging the gap between "developed" and "developing" countries by triggering innovations in products and services, boosting local entrepreneurship, ensuring better education outcomes and reducing income inequality. 
</div>

<div style="text-line:justify">
Our notebook focuses in Africa, a continent that has been historically considered "under-developed" in terms of economic growth but which has nevertheless risen recently to the status of the second fastest-growing region of the world (<b>Sayeh 2013</b>). In particular, we explore and analyze the responses of African Kagglers in the 2019 Kaggle ML & DS Survey, to identify machine learning (ML) patterns and educational outcomes in Africa. Following analysis of the results and evaluation of such patterns and outcomes, we conclude by proposing policy recommendations for the improvement of machine learning education and other data-driven learning initiatives in the continent.
</div>

### Why Africa <a id="why-africa"></a>
***


> Africa is Tech's next great frontier (**Kahn 2019**)

<div style="text-line:justify">
Africa constitutes an interesting case-study when it comes to embracing ML and AI initiatives. On the one hand, the continent's high economic growth rate has led some experts to speculate that Africa may be "Tech's next great frontier" (<b>Kahn 2019</b>) while the continent's relatively young and increasingly educated population is a substantial pool of potential talent (<b>Hao 2019</b>). On the other hand, as the majority of the continent's countries still falls under "least developed country" (LDC) status (see UNCTAD map below), the blossoming of ML in Africa presents tangible opportunities for improving the lives of millions (<b>Cisse 2018</b>). However, education (<b>Entrepreneur 2019</b>) is central in all efforts enabling Africa to reap the benefits of ML and AI and becoming an active participant in the world ML/AI community.
</div>    

![](https://unctad.org/en/PublishingImages/aldc_LDCs_map_large.jpg)

<div style="text-line:justify">
Indeed, in recent years, Africa has become the epicenter of numerous exciting initiatives aiming to broaden participation in the study of ML and related disciplines, increase diversity in both academic and non-academic conferences and, generally, democratize AI (<b>Maskey 2018</b>). In particular:
</div>

*  **Tech leaders are actively investing in Africa**. In 2018, Google [announced](https://www.blog.google/around-the-globe/google-africa/google-ai-ghana/) the opening of a Google AI research center in Accra, Ghana. The center's purpose is to focus on advancing our understanding of ML while, at the same time, develop practical ML applications addressing challenges in key sectors, such as healthcare, education, and agriculture. Likewise, [IBM Research–Africa](http://www.research.ibm.com/labs/africa/) is IBM’s 12th global research lab and the first industrial research facility in the continent. The company opened its first African office in Nairobi, Kenya in 2013, followed by a second one in Johannesburg, South Africa in 2016.

* **Major academic conferences move to Africa while AI learning initiatives are blossoming throughout the continent**. The [announcement](https://iclr.cc/Conferences/2020/CallForPapers) that the eighth International Conference on Learning Representations (ICLR 2020), one of the premier conferences in ML and AI, will take place in Addis Ababa, Ethiopia, received a lot of praise as a step towards fostering a more inclusive research environment. However, while the ICLR 2020 is widely regarded as the first major AI research conference that will take place in the African continent, notable AI learning initiatives have been paving the way for more than half a decade. The [Data Science Africa (DSA)](https://www.datascienceafrica.org/) annual summer schools and workshops, launched in 2013, have trained participants on ML and related methods and provided an avenue for researchers to present relevant work in the African context. Similarly, the [Deep Learning Indaba](http://www.deeplearningindaba.com/), an annual meeting of the African Machine Learning community that began in 2017, has been featuring keynotes and tutorials by leading figures in the field, such as [Nando de Freitas](https://www.cs.ubc.ca/~nando/), [Jeff Dean](https://ai.google/research/people/jeff/), [Kyunghyun Cho](http://www.kyunghyuncho.me/) and [Shakir Mohamed](https://shakirm.com/).

![](https://www.microsoft.com/en-us/research/uploads/prod/2018/09/Deep-Learning-Indaba-2017-group-1024x683.jpg)

<div style="text-line:justify">
In our view, the above developments suggest that the ML community in Africa has gathered great momentum, which we hope will keep rising in the future. Our exploration of the 2019 Kaggle ML & DS Survey attempts to assess the current status of AI in Africa, identify key drivers and dynamics behind the ML adoption in the continent and suggest ways, through which the relevant efforts can be accelerated.
</div>

### Methodology <a id="methodology"></a>
***

<div style="text-line:justify">
To assess the status of AI in Africa, we compared the African countries which participated in the survey (<b>Algeria, Egypt, Kenya, Morocco, Nigeria, South Africa and Tunisia</b>) with certain countries that are universally accepted as leading the AI era (<b>China, India, Japan, United Kingdom and United States</b>), which we term "AI Leaders". Our motivation was to gauge whether African countries are on par with the AI Leaders or whether there are still significant gaps that need to be bridged.
</div>    



#### How we identified the AI Leaders <a id="identify-leaders"></a>

> China and the United States are often seen as a "global economic duopoly" in AI, with the former leading the industrialization of AI and the latter dominating its discovery (**Araya 2019**)

<div style="text-line:justify"> For the purposes of our analysis, we have created an index of the top 5 countries influencing the global AI community (AI Leaders) based on the following three criteria: </div>

*	Their world share of AI investment and financing from 2013 to 2018 (**Holst 2019**);
*	The number of local job listings regarding AI professional opportunities in those countries (**Reints 2018**); and
*	The number of citable academic [publications](https://www.scimagojr.com/countryrank.php?category=1702&year=2018) in such countries in 2018 (without regard to the influence of such publications, which can be subjective).

<div style="text-line:justify">
Our list is led by China and the United States, which are often seen as the "global economic duopoly" (<b>Araya 2019</b>) in AI, with the former leading the industrialization of AI and the latter dominating its discovery. Completing our "top five" index are India, the United Kingdom and Japan. One should note that, as is the case with all similar metrics, the composition of our index incorporates some arbitrariness. Other countries (such as Germany, Canada, France, Israel and Australia) have been also paramount in driving AI adoption worldwide and leading relevant initiatives; after all, there is no single recipe for the development of strategic advantage in AI (<b>Louks et al. 2019</b>). However, we feel that the identification of the above countries as AI Leaders is a fairly accurate representation of the major players in the world’s AI community and does not compromise or otherwise affect the results of our analysis.
</div>

<div style="text-line:justify">
The following table includes a breakdown of the number of respondents per country in the AI Leaders group: </div>

In [None]:
grouped_ai_leaders = ai_leaders_africa[ai_leaders_africa['ai_leader'] == 'AI_LEADERS'].groupby('Q3')['ai_leader'].count().sort_values(ascending=False)
fig = plotly.graph_objs.Figure(
    data=[
        plotly.graph_objs.Table(
            header=dict(values=['Countries', 'Number of Respondents']),
            cells=dict(values=[grouped_ai_leaders.index, grouped_ai_leaders.values])
        )
    ])
fig.update_layout(
    autosize=False,
    width=700,
    height=150,
    margin=plotly.graph_objs.layout.Margin(
        l=0,
        r=0,
        b=0,
        t=0,
        pad=0
    )
)


fig.show()

#### Participating African Countries <a id="african-countries"></a>

<div style="text-line:justify"> Seven African countries participated in the 2019 Kaggle DS & ML Survey These countries are, in particular, Algeria, Egypt, Kenya, Morocco, Nigeria, South Africa and Tunisia). The following table includes a breakdown of the number of respondents per country in the Africa group: </div>

In [None]:
grouped_african = african_countries.groupby('Q3')['ai_leader'].count().sort_values(ascending=False)
fig = plotly.graph_objs.Figure(
    data=[
        plotly.graph_objs.Table(
            header=dict(values=['Countries', 'Number of Respondents']),
            cells=dict(values=[grouped_african.index, grouped_african.values])
        )
    ])
fig.update_layout(
    autosize=False,
    width=700,
    height=169,
    margin=plotly.graph_objs.layout.Margin(
        l=0,
        r=0,
        b=0,
        t=0,
        pad=0
    )
)

fig.update_layout(
    paper_bgcolor="LightSteelBlue",
)


fig.show()

<div style="text-line:justify"> It is noted that the above countries are very diverse in terms of geography, socio-economic level, ethnicity and culture. For these reasons, we have taken a deep dive into the country level, in some instances where we have considered that cross-country analysis would be more appropriate.

We also note that none of the above countries qualify as LDCs (as per the UNCTAD map above). Nevertheless, we consider that the analysis of ML adoption trends has substantial merit: given their relative economic maturity, such countries are more likely to be considered “early adopters” that could pave the way for the faster adoption of ML and AI in the entire African continent. </div>

## Data Analysis <a id="data-analysis"></a>
***

### Demographics <a id="demographics"></a>
***

#### Age (Distribution) <a id="age"></a>

> **Qustion 1**: What is your age (# years)?

In [None]:
parameters = {
    'dfs' : [ai_leaders_africa, african_countries],
    'strat_cols' : ['ai_leader', 'Q3'],
    'quant_col' : 'Q1',
    'title' : None,
    'xaxis_titles' : [None] * 2,
    'yaxis_titles' : ['Percentage of respondents', None],
    'rows' : 1,
    'cols' : 2,
    'horizontal_spacing' : 0.02,
    'yaxis' : [0, 1.02],
    'legend_y' : -0.15,
    'legend_orientation' : 'h',
}

fig = create_plotly_barchart(**parameters)
fig.show()

The most notable differences between Africa and the AI Leaders are the following:
* African countries have a larger proportion of "young coders" compared to the AI Leaders. In particular `68.3%` of African coders falls within the first three age bins (18-21, 22-24 and 25-29) against `60.2%` of the coders from the AI Leaders. 
* African countries dominate the the 22-24 and 25-29 age bins (i.e., coders in their twenties) with `56%` of the African respondents to the survey falling in either bin, as opposed to `41.7%` of the AI Leaders. On the other hand, AI Leaders have a larger proportion of Kagglers in the 18-21 age bin (`18.5%`), as compared to African countries (`12.3%`). 
* Africa also has less coders aged 40 and above (`9%`) compared to the AI Leaders (`16.7%`).
* The above comparison suggests that the depiction of Africa in the media as a source of "young and enthusiastic talent" may have indeed merit.

Age distribution by country across African countries varies greatly. However, we can distinguish between three main broad patterns:
* In Tunisia, Egypt and Morocco, a high proporation of the respondents falls within the 22-24 age bin ("early twenties"), followed by a likewise relatively high proportion of the population in the 25-29 age bin ("late twenties"). Tunisia is notable insofar as `52.9%` of the Kagglers who replied to the survey are in their early twenties. This patterns suggests that the coders in these countries are expected to have typically completed their college education and be in a position to currently join the workforce. On the other hand, these counties have a lower proportion in the "college age" age bin of 18-21.
* South Africa and Algeria follow a somewhat similar pattern, with the difference that the highest majority of their population that answered the Kaggle questionnaire falls in the "late twenties" bin and the second highest falls into the "early twenties" one. Again, the proportion of the "college age" coders is relatively low (i.e., below `10%`). These countries also have significant numbers of coders in their "early thirties": `22.5%` of South Africa's coders and `19%` of Algeria's fall in the respective bin (30-34). As the workforce of these age categories can be expected to be more experienced professionally, this could suggest that these countries are good talent pools for large-scale ML and AI initiatives or managerial projects. 
* The other two countries, Kenya and Nigeria, are closer to the aggregate trend for African countries, insofar as the majority of their population is concentrated roughly equally in the first three bins (i.e., "college age", "early twenties" and "late twenties.")

#### Gender (Distribution) <a id="gender"></a>

> **Question 2**: What is your gender?

In [None]:
parameters = {
    'dfs' : [ai_leaders_africa, african_countries],
    'strat_cols' : ['ai_leader', 'Q3'],
    'quant_col' : 'Q2',
    'title' : None,
    'xaxis_titles' : [None]*2,
    'yaxis_titles' : ['Percentage of respondents', None],
    'rows' : 1,
    'cols' : 2,
    'horizontal_spacing' : 0.02,
    'yaxis' : [0, 1.02],
    'legend_y' : -0.15
}

fig = create_plotly_barchart(**parameters)
fig.show()

Gender distribution is very similar between Aftican countries and the AI Leaders, even though we observe a somewhat higher percentage of female coders among African Kagglers (`21%`) as compared to Kagglers from the AI Leaders (`16.8%`).

We observe greater heterogeneity in gender distribution across African countries. In particular:
* Tunisia stands out as a notable case of "gender parity" among coders: `51.5%` of the country's surveyed Kagglers are male, while `8.5%` are female. To be noted that the size of Tunisian Kagglers who took the survey (`68`) can be seen as being in line with that of other northern African countries (see also above).
* Other than Tunisia, the country with the largest proportion of female Kagglers is Kenya (`25.4%`).
* The countries with the least representation of female coders in Kaggle are South Africa (`15.8%`) and Nigeria (`15.9%`). Assuming that participation of Kaggle is indicative of coding activity generally, such proportions may be an indication that targeted initiatives could be offered in those countries, to stimulate the interest of the female population in coding.

#### Education (Distribution) <a id="education"></a>

> **Question 4**: What is the highest level of formal education that you have attained or plan to attain within the next 2 years?

In [None]:
invert_Q4_name_map = {
    4 : 'Master\'s',
    1 : 'Professional',
    3 : 'Bachelor\'s',
    2 : 'Some uni courses',
    6 : 'Doctoral',
    -1 : 'No answer',
    0 : 'High school',
}

parameters = {
    'dfs' : [ai_leaders_africa, african_countries],
    'strat_cols' : ['ai_leader', 'Q3'],
    'quant_col' : 'Q4',
    'title' : None,
    'xaxis_titles' : [None]*2,
    'yaxis_titles' : ['Percentage of respondents', None],
    'rows' : 1,
    'cols' : 2,
    'horizontal_spacing' : 0.02,
    'names_map' : invert_Q4_name_map,
    'yaxis' : [0, 1.02],
    'legend_y' : -0.15
}

fig = create_plotly_barchart(**parameters)
fig.show()

The distributions of the population between Africa and the AI Leaders are very comparable. The main difference has to do with the proportion of bachelor’s versus master’s degree holders. In Africa `41.2%` of the respondents reported a bachelor’s degree as their highest education attained while `36.8%` are master’s degree holders. The pattern is inverted in AI Leaders where `37.2%` report a bachelor’s degree as their highest degree while `42.2%` are master’s degree holders.

Given that, as seen above, there are more Kagglers from AI Leaders in the "college age" age bin than African Kagglers, this may suggest that fewer African Kagglers "invest" in a master’s level postgraduate education. It is nevertheless noted that the proportion of respondents with a doctoral degree are similar across the two groups (`10.7%` in Africa v. `11.5%` in AI Leaders).
Across African countries, we observe greater variation, with the following two patterns emerging:
* Algeria, Morocco and Tunisia have higher proportion of graduate degree holders. These countries have a typically high proportion of masters’ degree holders (`42.9%` in Algeria, `53.3%` in Morocco and `50%` in Tunisia), followed by a relatively high proportion of doctorate degree holders (`28.6%` in Algeria, `26.7%` in Morocco and `15.2%` in Tunisia).
*	The remaining countries (Egypt, Kenya, Nigeria and South Africa) follow the general trend for African countries and are typified by a population whose largest proportion holds a bachelor’s degree while the second largest proportion holds a master’s degree. The proportion of bachelors’ degree holders is notably high in Egypt (`58.7%`) and Kenya (`57.5%`). On the other hand, the divide between bachelor’s and master’s degrees holders is less pronounced in Nigeria and South Africa: in the first country we observe `50.5%` bachelor’s holders v. `37%` master’s holders while in the second, the respective percentages are `37.8%` and `32.8%`.

### Developing Experience <a id="experience"></a>
***

#### Coding (Distribution) <a id="coding-years"></a>

> **Question 15**: How long have you been writing code to analyze data (at work or at school)?

In [None]:
parameters = {
    'dfs' : [ai_leaders_africa, african_countries],
    'strat_cols' : ['ai_leader', 'Q3'],
    'quant_col' : 'Q15',
    'title' : None,
    'xaxis_titles' : [None]*2,
    'yaxis_titles' : ['Percentage of respondents', None],
    'rows' : 1,
    'cols' : 2,
    'horizontal_spacing' : 0.02,
    'names_map' : dict(map(reversed, Q15_name_map.items())),
    'yaxis' : [0, 1.02],
    'legend_y' : -0.15
}

fig = create_plotly_barchart(**parameters)
fig.show()

In comparing the distribution of years of coding experience among the African countries and the AI Leaders the differences are significant. The overall takeaway is that, on average, African coders seem to be relatively newer to the coding game, given that:

*	The largest proportion of African Kagglers (`37.4%`) has been coding for less than a year and the overall proportion of that have been coding for less than two years is `66.3%`. By comparison, even though "less than a year" and "one to two years" likewise gather the most responses from AI Leaders, the overall proportion of users from these countries that have been coding for less than two years is `52.0%`.
*	The proportion of Kagglers who have been coding between three and five years in the AI Leaders is `20.9%` compared to `14%` for Africa. Similarly, the proportion of Kagglers who have been coding between five and ten years in the AI Leaders is `11.2%` compared to `5.6%` for Africa.
*	At the extreme ends of the experience scale, `10%` of Kagglers in AI Leaders have been coding for ten years or more compared to `3%` of Kagglers in Africa. Conversely, `10.9%` of Kagglers in Africa have never coded before, compared to `5.9%` in AI Leaders.

Some heterogeneity can be nevertheless observed across African countries. In particular:

* The distribution of coding experience is very similar in Egypt, Kenya and Morocco. In these countries the majority of respondents (`62.6%` in Egypt, `70%` in Kenya and `63.4%` in Morocco) report up to 2 years of experience while their distribution is roughly equal in the "up to one year" and "1-2 years" bins. The proportion of Kagglers falling in the "3-5 years" bin is `22.9%` in Egypt, `14%` in Kenya and `15.2%` in Morocco. 
* Algeria, Nigeria and Tunisia are similar to the above sub-group in so far as the majority of respondents (`61.5%` in Algeria, `75.8%` in Nigeria and `70.5%` in Tunisia) report up to 2 years of coding experience. However, the distributions at the bin level are not equal, with Tunisia having more respondents in the "1-2 years" bin (`45.5%`) and Algeria and Nigeria more in the "up to one year" one (`20.5%` and `27.5%` respectively). The three countries also report the three highest proportions of Kagglers that have "never written code" (`10.3%` in Algeria, `13.7%` in Nigeria and `15.9%` in Tunisia).
* South Africa has a unique distribution and stands out as its coders report more years of experience. The greatest proportion of the population (`26.5%`) has between 3 and 5 years of coding experience, while the second greatest (`22.5%`) between 1 and 2. South Africa also boasts a notable number of Kagglers with more than five years of coding experience (`29.4%`). The relevant maturity of data science in South Africa could be partly explained by the fact that the country is among the continent's most advanced economies and, as a result, has been at the forefront of data science in the region. 

#### Machine Learning (Distribution) <a id="ml-years"></a>

> **Question 23**: For how many years have you used machine learning methods?

In [None]:
parameters = {
    'dfs' : [ai_leaders_africa, african_countries],
    'strat_cols' : ['ai_leader', 'Q3'],
    'quant_col' : 'Q23',
    'title' : None,
    'xaxis_titles' : [None]*2,
    'yaxis_titles' : ['Percentage of respondents', None],
    'rows' : 1,
    'cols' : 2,
    'horizontal_spacing' : 0.02,
    'names_map' : dict(map(reversed, Q23_name_map.items())),
    'yaxis' : [0, 1.02],
    'legend_y' : -0.15
}

fig = create_plotly_barchart(**parameters)
fig.show()

This graph, which illustrates the distribution of years of ML experience across Africa and AI Leaders, follows (as would be expected) the trend identified for coding experience, with most African Kagglers reporting that they have only recently turned to ML. In particular:
* `53.9%` of African Kagglers report having ML experience of less than 1 year, compared to `37.7%` of Kagglers from AI Leaders.
* `4.1%` of African Kagglers report having more than 5 years of ML experience, compared to `9.2%` of Kagglers from AI Leaders. No African Kagglers reported having more than 20 years of ML experience.

We also observe the following patterns across countries:
* Algeria, Kenya and Nigeria drive the trend for African countries insofar as there is a large divide between those with less than one year of ML experience and those with ML experience between one and two years. For example, `56.2%` of respondents in Algeria reported having less than one year of ML experience v. `18.8%` who reported having between one and two years. For Kenya and Nigeria, the same percentages are `56.8%` v. `32.9%` and `67.8%` v. `22%` respectively. 
* The divide seems less pronounced in Egypt, Tunisia and Morocco – the latter country is the only one where the percentage of respondents with ML experience between one and two years exceeds that of respondents with ML experience below one year. More specifically, `41.3%` of the respondents in Egypt reported ML experience below one year against `36.7%` with ML experience between one and two years. The same numbers were `35.6%` (v. `39.2%`) for Morocco and `44.1%` (v. `32.3%`). 
* We note that Egypt and Morocco also have a noteworthy proportion of Kagglers (i.e., above `10%`) with ML experience between two and three years. 
* South Africa is again unique in terms of its distribution. Overall, it follows the general African trend of a relatively large divide between those with less than one year of ML experience and those with ML experience between one and two years. However, among all African countries, South Africa has the lowest percentage of Kagglers with less than one-year ML experience (`37.6%`) while a large part of its population falls in the bins pertaining to ML experience of two years and above.  Notably, `10.7%` of South African respondents reported 5-10 years of relevant experience while the percentage of those with between two and three years is `16.1%`.



### Sources of Information/Education <a id="info-edu"></a>
***

#### Media Platforms (Popularity) <a id="fav-media"></a>

> **Question 12**: Who/what are your favorite media sources that report on data science topics? (Select all that apply)

In [None]:
media_platforms_cols = ['Q12_Part_' + str(x) for x in range(1,11)]
media_platforms_names = [
    'Twitter', 
    'Hacker News',
    'Reddit',
    'Kaggle',
    'Course Forums',
    'Youtube',
    'Podcasts',
    'Blogs',
    'Journals',
    'Slack'
]

parameters = {
    'df' : ai_leaders_africa,
    'strat_col' : 'ai_leader',
    'df_cols' : media_platforms_cols,
    'x_names' : media_platforms_names,
    'title' : None,
    'xaxis_title' : None,
    'legend_x' : 0.35,
}

fig = create_polar_multipart(**parameters)
fig.show()

This barplot, which compares the relative popularity of DS media and information platforms across African countries and the AI Leaders, reveals several notable trends. Among others, we observe that:
* Kaggle, Blogs and Youtube are the most popular media platforms across both groups (in the same order) through which aspiring data scientists get informed on the most recent developments in DS and ML.
* African Kagglers seem to be somewhat more enthusiastic users of Kaggle (`60.4%` of respondents use the platform v. `55.9%` of responding Kagglers from the AI Leaders) and Youtube (`42.9%` of African Kagglers selected the platform, compared to `38.51%` of Kagglers from the AI Leaders). On the other hand, Kagglers from AI Leaders seem to be more enthusiastic blog readers, as `51.9%` of them reads blogs regularly, compared to `43.3%` of African Kagglers.
* The ranking of preferences with respect to the remaining media follows a similar pattern; however, we can detect differences within such pattern that are indicative of preferences. For example, Hacker News and Podcasts, are the least popular media according to the results of the Kaggle Survey across both categories. However, the AI Leaders prefer Hacker News to Podcasts while the pattern in Africa is reversed.
* Twitter (`26.4%`) is the fourth most popular DS media platform in Africa. Journals, which are the fourth most popular means for AI Leaders (`21%`), rank sixth in Africa (`17%`).

#### Education Platforms (Popularity) <a id="edu-platforms"></a>

> **Question 13**: On which platforms have you begun or completed data science courses? (Select all that apply)

In [None]:
edu_platforms_cols = ['Q13_Part_' + str(x) for x in range(1,11)]
edu_platforms_names = [
    'Udacity', 
    'Coursera',
    'edX',
    'DataCamp',
    'DataQuest',
    'Kaggle Courses',
    'Fast.ai',
    'Udemy',
    'LinkedIn Learning',
    'University Courses (resulting in degree)'
]

parameters = {
    'df' : ai_leaders_africa,
    'strat_col' : 'ai_leader',
    'df_cols' : edu_platforms_cols,
    'x_names' : edu_platforms_names,
    'title' : None,
    'xaxis_title' : None,
    'legend_x' : 0.35,
}

fig = create_polar_multipart(**parameters)
fig.show()

Africa does not “lag behind” in the use of DS learning tools; if else, it appears to be an enthusiastic participant in the DS learning community. The key takeaways from this barplot, which reflects the relative popularity of DS learning platforms across African countries and the AI Leaders, are the following:
* Coursera and Kaggle are the most popular learning platforms across both groups. Coursera enjoys higher relative popularity among Kagglers from the AI Leaders (`43.7%`) than Kagglers from Africa (`35%`) while Kaggle inverses this trend (`30.7%` relative popularity in Africa and `25.1%` relative popularity in AI Leaders).
* Overall, African Kagglers seem to be more enthusiastic users of Data Camp, Udemy, edX and Udacity, compared to Kagglers from the AI Leaders.
* The third most popular DS learning platform in Africa is Data Camp (`26.7%`). The third most popular DS learning platform in AI Leaders in Udemy (`25%`).
* University courses are the fourth most popular learning platform in AI Leaders (`21.4%`) but only rank in seventh place in the preferences of African Kagglers (`17.5%`).

### The Status of Machine Learning <a id="ml-status"></a>
***

<div style="text-line:justify"> 
As the title of this notebook suggests, this is a study of the state of ML in the African continent. So, it is only natural that we turn our attention to the ML algorithms, software and hardware that are used by ML scientists and engineers on a daily basis. We begin by an investigation of general purpose machine/deep learning algorithms and consider also some more specialized ones that are prominent in the Computer Vision and Natural Language Processing domains. </div>

#### Algorithms for ML, CV, NLP (Popularity) <a id="algos"></a>

> **Question 24**: Which of the following ML algorithms do you use on a regular basis? (Select all that apply)

In [None]:
algos_cols = ['Q24_Part_' + str(x) for x in range(1,11)]
algos_names = [
    'Regression', 
    'DT/RF',
    'GBM',
    'Bayesian',
    'Evolutionary',
    'Dense NN',
    'CNN',
    'GAN',
    'RNN',
    'Transformers'
]

parameters = {
    'df' : ai_leaders_africa,
    'strat_col' : 'ai_leader',
    'df_cols' : algos_cols,
    'x_names' : algos_names,
    'title' : None,
    'xaxis_title' : None,
    'legend_x' : 0.35,
}

fig = create_polar_multipart(**parameters)
fig.show()

> **Question 26**: Which categories of computer vision methods do you use on a regular basis?  (Select all that apply)

In [None]:
cv_cols = ['Q26_Part_' + str(x) for x in range(1,6)]
cv_names = [
    'General Purpose', 
    'Image Segmentation',
    'Object Detection',
    'Image Classification',
    'Generative Networks',
]

parameters = {
    'df' : ai_leaders_africa,
    'strat_col' : 'ai_leader',
    'df_cols' : cv_cols,
    'x_names' : cv_names,
    'title' : None,
    'xaxis_title' : None,
    'legend_y' : 0.35,
}

fig = create_polar_multipart(**parameters)
fig.show()

> **Question 27**: Which of the following natural language processing (NLP) methods do you use on a regular basis?  (Select all that apply)

In [None]:
nlp_cols = ['Q27_Part_' + str(x) for x in range(1,5)]
nlp_names = [
    'Word embeddings', 
    'Encoder-decoders',
    'Contextualized embeddings',
    'Transformer LM',
]

parameters = {
    'df' : ai_leaders_africa,
    'strat_col' : 'ai_leader',
    'df_cols' : nlp_cols,
    'x_names' : nlp_names,
    'title' : None,
    'xaxis_title' : None,
    'legend_x' : 0.35,
}

fig = create_polar_multipart(**parameters)
fig.show()

The above bar charts show the popularity of each ML (respectively, computer vision, natural language processing) algorithm among two distinct groups of respondents: those that live/work in a country that is an AI Leader and those that live/work in an Afican country. For the `regression` algorithm, for example, we observe that it is used by about half (`52%`) of the AI Leader respondents by about `45%` of African respondents. There are some important observations and take-aways to note here:
* Every technique/algorithm is used more by the group of AI Leaders, however the differences in popularity are not extreme, and only in the case of `gradient boosting machines` do they surpass `10%`.
* The greatest differences in popularity are observed in more traditional ML algorithms such as `regression`, `random forests` and `gradient boosting machines`. In more modern techniques (i.e. various deep learning algorithms) there seems to be a much smaller gap. One potential explanation might be that the resurgence of the study of artificial neural networks and deep learning can be placed at 2012 (with the success of **Krizhevsky et al. (2012)** in using GPUs to train convolutional neural networks that won the ImageNet competition) and coincides with the beginning of the first efforts to broaden participation in ML. This coincidence might have led to an increased interest for deep learning techniques as opposed to more traditional methods.
* Considering ML algorithms that require the use of at least a GPU to train (e.g. `RNNs`, `Transformers`, `CNNs`) we note that African countries do not lag far behind their AI Leader counterparts. In the domain of NLP, for instance, popularity differences across all deep-learning algorithms are at no more than `2.5%`. The corresponding figure for computer vision is at around `4.5%`. This indicates that, at least in the given set of African respondents, hardware considerations have not been a significant obstacle in their ML development.

#### Software: ML Frameworks (Popularity) <a id="ml-framework"></a>

> **Question 28**: Which of the following machine learning frameworks do you use on a regular basis? (Select all that apply)

In [None]:
ml_frameworks_cols = ['Q28_Part_' + str(x) for x in range(1,11)]
ml_frameworks_names = [
    'Scikit-learn', 
    'Tensorflow',
    'Keras',
    'RandomForest',
    'Xgboost',
    'PyTorch',
    'Caret',
    'LightGBM',
    'Spark MLib',
    'Fast.ai'
]

parameters = {
    'df' : ai_leaders_africa,
    'strat_col' : 'ai_leader',
    'df_cols' : ml_frameworks_cols,
    'x_names' : ml_frameworks_names,
    'title' : None,
    'xaxis_title' : None,
    'legend_x' : 0.35,
}

fig = create_polar_multipart(**parameters)
fig.show()

We observe, perhaps unsurprisingly, that across the board the various ML frameworks are more popular among AI Leaders. A few points worth noting:

* The largest differences in popularity are observed in frameworks that are designed for non-deep learning, such as `Scikit-learn` (difference: `~8%`), `RandomForest` (difference: `~7%`) and `Xgboost` (difference: `~8%`). This observation is in line with our previous observation on popular ML algorithms. We noted in particular that more traditional learning methods are less popular in African countries, so unsurprisngly, the frameworks to implement said methods will be correspondingly less popular.

* Focusing on the main deep learning frameworks, we note that `Tensorflow` and `Keras` (although with the introduction of `TensorFlow 2.0` the line between `Tensorflow` and `Keras` is not well-defined) are dominant compared to `PyTorch`, in both AI Leaders and African countries. This seems to follow the pattern that we observe for deep learning framework popularity in industry rather in research, where `PyTorch` took the lead from `Tensorflow` overwhelmingly (see relevant [article](https://thegradient.pub/state-of-ml-frameworks-2019-pytorch-dominates-research-tensorflow-dominates-industry/) for further details). This, of course, might be due to inertia, since `TensorFlow` has been around for more years than `PyTorch`. We should expect next year's survey results to gauge if the popularity has shifted in the same manner as that for research papers.

#### Specialized Hardware (Popularity) <a id="hardware"></a>

> **Question 21**: Which types of specialized hardware do you use on a regular basis?  (Select all that apply)

In [None]:
hardware_cols = ['Q21_Part_' + str(x) for x in range(1,4)]
hardware_names = [
    'CPUs', 
    'GPUs',
    'TPUs'
]

parameters = {
    'df' : ai_leaders_africa,
    'strat_col' : 'ai_leader',
    'df_cols' : hardware_cols,
    'x_names' : hardware_names,
    'title' : None,
    'xaxis_title' : None,
    'legend_x' : 0.35,
}

fig = create_polar_multipart(**parameters)
fig.show()

#### TPU Usage Frequency (Distribution)

> **Question 22**: Have you ever used a TPU (tensor processing unit)?

In [None]:
parameters = {
    'dfs' : [ai_leaders_africa],
    'strat_cols' : ['ai_leader'],
    'quant_col' : 'Q22',
    'title' : None,
    'xaxis_titles' : [None],
    'yaxis_titles' : ['Percentage of respondents'],
    'yaxis' : [0, 1.02],
    'names_map' : dict(map(reversed, Q22_name_map.items())),
}

fig = create_plotly_barchart(**parameters)
fig.show()

<div style="text-line:justify"> 
    
Last but not least, we take a look at the use of ML-relevant hardware. We observe, unsurprisingly, that CPU popularity/usage is about the same across both AI Leaders and African countries (difference of `~0.5%` in favour of AI Leaders). It is not clear from `Question 22` whether it is referring to specialized CPU chipsets or whether it includes CPUs of typical laptop computers or ML workstations. We hypothesize that the respondents interpreted it to be the latter, which led to such a small difference in popularity.

With GPUs on the other hand, we observe a considerable difference of `~11%` in popularity in favour of AI Leaders. This is expected, as GPU cards on the one hand and GPU-containing instances in cloud computing services are quite expensive, so we expect higher popularity more developed countries.

What is indeed very startling is the higher TPU popularity in African countries. TPUs are specialized pieces of hardware that accelerates ML computations and are currently only available through Google Cloud. TPU instance tend to be much more expensive that GPU instances so we would expect a large difference in favour of AI Leaders (much like the one we observed for GPUs), however we observe a `~0.2%` higher popularity in African countries. What makes things even more peculiar is that if we observe the frequency distribution for TPU usage, we observe that for all frequency groups (2-5 times, 6-24 times etc.) the AI Leaders are slightly higher (although the differences are very small).

#### Machine Learning Expenditure (Distribution) <a id="ml-expenditure"></a>

> **Question 11**: Approximately how much money have you spent on machine learning and/or cloud computing products at your work in the past 5 years?

In [None]:
invert_Q11_name_map = {
    0 : 'USD 0',
    1: 'USD 1-99',
    100: 'USD 100-999',
    1000 : 'USD 1,000-9,999',
    10000 : 'USD 10,000-99,999',
    100000 : 'USD >100,000'
}

parameters = {
    'dfs' : [ai_leaders_africa, african_countries],
    'strat_cols' : ['ai_leader', 'Q3'],
    'quant_col' : 'Q11',
    'title' : None,
    'xaxis_titles' : [None]*2,
    'yaxis_titles' : ['Percentage of respondents', None],
    'rows' : 1,
    'cols' : 2,
    'horizontal_spacing' : 0.02,
    'yaxis' : [0, 1.02],
    'names_map' : invert_Q11_name_map,
    'legend_y' : -0.15
}

fig = create_plotly_barchart(**parameters)
fig.show()

Last but not least, we turn our attention to the practical matter of ML expenditure. Distribution of ML expenditure reveals stark differences between African countries and the AI Leaders and could be seen as the only sector where the countries in review could be considered as "lagging behind" (more likely due to reasons related to income inequality on a cross group level). More specifically:
* `35.9%` of African Kagglers report no lifetime ML expenditures whatsoever, compared to `29.6%` of Kagglers from AI Leaders.
* `42.3%` of African Kagglers report ML expenditures up to USD 999 (i.e., either in the "USD 1-99" or "USD 100-999" bins) and `21.9%` report ML expenditures above USD 1,000. The same percentages for Kagglers from the AI Leaders are `31.3%` and `39.1%` respectively.
*  The differences become starker, the higher the expenditure level. Only `4.5%` of the African Kagglers report ML expenditures in the USD 10,000 to USD 99,999 range (v. `11.3%` of Kagglers from AI Leaders) and `2.5%` reported expenditures above USD 100,000 (v. `11.1%` of Kagglers from AI Leaders).

In addition, the key takeaways at cross-country level can be summarized as follows:
* Algeria seems to be the country with the fewest ML expenditures, as the largest proportion of its population (`55.9%`) reported no expenditures whatsoever while no respondents reported expenditures of more than USD 9,999.
* South Africa is located at the opposite end of the spectrum (and could be placed closer to the overall trend we observe for AI Leaders). It is the country with the lowest proportion of no ML expenditures (`28%`) and has the largest proportion of Kagglers reporting expenditures in the "USD 10,000 – 99,999" (`9.8%`) and "USD 10,000 – 99,999" (`8.5%`) bins.
* The remaining countries follow, with some variation, the overall trend for African countries. No expenditures above USD 99,999 are reported in Morocco and Tunisia. 

## Limitations <a id="limitations"></a>

Below we outline the some of the limitations to our above analysis:
*	We have based our analysis of the ML status in Africa on the responses of Kagglers participating in the 2019 DS & ML Survey. However, such respondents are likely to be among the "early adopters" of ML and AI tools and, as a result, may not be representative with respect to the general population in Africa (e.g. the average technology user). Nevertheless, we do believe that studying the behavior of early adopters is crucial in understanding what drives adoption and influences how fast technology spreads.
*	As discussed in our methodology section, our analysis is based on African countries that are very diverse both in terms of geography as well as socio-economic level. Most are countries either in the northern-most or southern-most regions of the continent. With only seven countries (which are also quite in diverse) from a continent of 54 countries, we cannot hope to have robust generalizations. We do, however, expect that countries that are similar to those analyzed in this notebook will probably exhibit similar patterns with respect to ML.
*   We have not any of the responses whose country column has the value `Other` (recall that if a given country had fewer than 50 respondents it was converted to `Other` for anonymity). Presumably, some of the respondents in that subset of the data might be coming from African countries but we did not attempt to identify them using some clustering technique.
*	For simplicity of analysis, we have not considered textual responses (i.e. those that the users did not select from a predetermined list but instead typed themselves) or responses such as "Other" or "None". In so doing, we acknowledge that we may have potentially missed out on valuable information.

## Conclusions and Recommendations <a id="conclusions"></a>

<div style="text-line:justify"> Our exploration of the 2019 DS & ML datasets reveals a relatively optimistic picture of the status of ML in African countries, at least to the extent that African Kagglers are seen as a representative sample of coders in the continent. The average African Kaggler is young and enthusiastic about ML and AI. Despite the fact that most of the respondents have few years of general coding or machine learning experience and that “Africa has only recently started to catch up” with state-of-the art developments in ML, it is catching up fast! There are no "significant gaps" that need to be bridged in respect of using particular ML frameworks, software or hardware or any hints that would suggest that we need to promote particular educational platforms or DS media. </div>



![](https://robohub.org/wp-content/uploads/2017/08/bigstock-169283726.jpg)

However, we have indeed identified a few notable factors that differentiate Africa from other countries leading the AI revolution:

* First, as hinted above, the average African Kaggler tends to be younger and more inexperienced compared to the average Kaggler from an AI Leader. We suspect that this trend can be primarily attributed to the overall demographic trends in the continent. 
* Second, average Kagglers report fewer ML-related expenditures. The reasons here are more likely attributable to income inequality and other economic phenomena across African countries and the AI Leaders.
* Third, while African Kagglers are on par with Kagglers from the AI Leaders with respect to the adoption of complicated techniques in NLP and computer vision, they seem to somewhat lag behind in the learning of “traditional” ML algorithms (e.g., regressions). This seems to suggest that African data scientists are fast to specialize or prefer learning “on the job” (as suggested by the somewhat low popularity of University courses). While this is not, in itself, a non-desirable outcome, we are of the view that firm adoption of ML throughout the continent can be more successfully pursued, if a larger part of the population is exposed to “the basics” of the discipline. 

Given these above insights, we have come up with the following (broad) policy recommendations which, if adopted, could enhance the status of ML in Africa and lead to its “democratization” in individual African countries:

* **Adapt educational curricula to incorporate basic coding and ML algorithms**: As already discussed, education is crucial in increasing adoption rates of ML. One of the key reasons why initiatives such as Africa Coding Week are very successful in this respect is that it includes school-aged children and development in its initiatives, while it aims to equip them with basic tools that are likely to stimulate further interest in the discipline. Local governments could adopt similar policies by incorporating computer science early on in the school curriculum and focusing (where age appropriate) on basic data science tools, statistics and regressions. Such efforts could increase interest in university-level ML courses, encourage more academic research and, ultimately, bring Africa at the intellectual forefront of the AI and ML community.
* **Continue to encourage investment by global technology firms**: African regulators should prioritize the creation of an investment-friendly climate in order to attract more technology giants in the likes of Google and IBM (e.g., through fiscal incentives or simplification of administrative procedures for companies in the space). A scenario where different governments would engage in such a “race to the top” could lead up to policies with limited fiscal cost but great impact in terms of ML adoption. More technology companies in Africa could mean that any economic imbalance hindering ML innovation could be partly addresses by the market mechanism. After all, global technology employers are more likely to sponsor employees to attend University, in order to pick up hard skills, implement corporate-wide training programs, invest in ML expenditure or, in some cases, encourage their employees to pursue ML-related research.
* **Subsidize smaller and local players that focus on ML investments**. Even though governments would be advised to prioritize policies intended to attract “BigTech” players, the importance of small, local entrepreneurs in spreading ML adoption should not be underestimated. Local entrepreneurship is crucial to the extent that it is more knowledgeable of challenges, opportunities and risks in a given country while is it often able to flexibly source creativity and talent. African country governments could thus examine incentives such as the provision of subsidies to local start-ups in the ML space or other incentives intended to facilitate their operations. Although such policies would arguably trigger larger fiscal costs, their implementation could lead to outcomes that indeed deliver of the promise to “tangibly improve the lives of millions” through AI and ML through the design of tailored and innovative applications.


## References <a id="references"></a>

* Davide Strusani and Georges Vivien Houngbonon. 2019. _The role of Artificial Intelligence in Supporting Development in Emerging Markets_. International Finance Corporation. [URL](https://www.ifc.org/wps/wcm/connect/32e54505-3bfb-4198-b939-e1e8847715f1/EMCompass-Note-69-Role-of-AI-in-EMs.pdf?MOD=AJPERES&CVID=mNdPiNf)
* Antoinette M Sayeh. 2013. _Africa: Second Fastest-Growing Region in the World_. IMFBlog. [URL](https://blogs.imf.org/2013/06/10/africa-second-fastest-growing-region-in-the-world/)
* Jeremy Kahn. 2019. _Africa Is Tech’s Next Great Frontier, Github A.I. Expert Says_. Fortune. [URL](https://fortune.com/2019/07/17/africa-is-techs-next-great-frontier-github-ai/)
* Karen Hao. 2019. _The future of AI research is in Africa_. MIT Technology Review. [URL](https://www.technologyreview.com/s/613848/ai-africa-machine-learning-ibm-google/) 
* Moustapha Cisse. 2018. _Look to Africa to advance artificial intelligence_. Nature. [URL](https://www.nature.com/articles/d41586-018-07104-7)
* Entrepreneur South Africa Staff. 2019. _Artificial Intelligence Is Filling The Gaps In Developing Africa_. Entrepreneur South Africa. [URL](https://www.entrepreneur.com/article/337223)
* Sameer Markey. 2018. _Here's Why We Need To Democratize Artificial Intelligence_. Forbes. [URL](https://www.forbes.com/sites/forbestechcouncil/2018/04/16/heres-why-we-need-to-democratize-artificial-intelligence/)
* Arne Holst. 2018. _Share of global artificial intelligence (AI) investment and financing by country from 2013 to 1Q'18_. Statista. [URL](https://www.statista.com/statistics/941446/ai-investment-and-funding-share-by-country/)
* Renae Reints. 2018. _Artificial Intelligence Jobs Are on the Rise. Which Countries Are Hiring Right Now_. Fortune. [URL](https://fortune.com/2018/11/29/ai-jobs-worldwide/)
* Scimago Lab. _Scimago Journal and Country Rank_. Retrieved 29 Nov. 2019. Scimago Lab. [URL](https://www.scimagojr.com/countryrank.php?category=1702&year=2018) 
* Daniel Araya 2019. _Who will lead in the age of Artificial Intelligence_. Forbes. [URL](https://www.forbes.com/sites/danielaraya/2019/01/01/who-will-lead-in-the-age-of-artificial-intelligence/#450cc0e46f95)
* Jeff Louks, Susanne Hupfer, David Jarvis and Timothy Murphy. 2019. _Future in the balance? How countries are pursuing an AI advantage_. Deloitte Insights. [URL](https://www2.deloitte.com/us/en/insights/focus/cognitive-technologies/ai-investment-by-country.html)