African Americans (also known as Black Americans and Afro-Americans) are an ethnic group in the United States. The first achievements by African Americans in various fields have historically marked footholds, often leading to more widespread cultural change. The shorthand phrase for this is "breaking the color barrier".
https://en.wikipedia.org/wiki/List_of_African-American_firsts

![](https://english.colostate.edu/wp-content/uploads/sites/56/2018/02/blackhistorymonth.jpg)english.colostate.edu

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
!pip install twython

In [None]:
#Another way to Twynthon
#easy_install twython

#Codes by Gabriel Preda https://www.kaggle.com/gpreda/vaccine-myths

In [None]:
from wordcloud import WordCloud, STOPWORDS
from nltk.sentiment import SentimentIntensityAnalyzer
from textblob import TextBlob
import warnings
warnings.simplefilter("ignore")

In [None]:
df = pd.read_csv('../input/african-american-achievements/science.csv', encoding='utf8')
pd.set_option('display.max_columns', None)
df.tail()

In [None]:
df1 = pd.read_csv('../input/african-american-achievements/firsts.csv', encoding='utf8')
pd.set_option('display.max_columns', None)
df1.tail()

In [None]:
df.isnull().sum()

In [None]:
def show_wordcloud(df, title=""):
    text = " ".join(t for t in df.dropna())
    stopwords = set(STOPWORDS)
    stopwords.update(["the", "for", "as", "a", "that", "to", "of", "at", "an"])
    wordcloud = WordCloud(stopwords=stopwords, scale=4, max_font_size=50, max_words=500,background_color="black", colormap='Set2').generate(text)
    fig = plt.figure(1, figsize=(16,16))
    plt.axis('off')
    fig.suptitle(title, fontsize=20)
    fig.subplots_adjust(top=2.3)
    plt.imshow(wordcloud, interpolation='bilinear')
    plt.show()

In [None]:
show_wordcloud(df['inventions_accomplishments'], title = 'Afro-Americans Inventions Accomplishments')

In [None]:
show_wordcloud(df['occupation_s'], title = 'Afro Americans Occupations')

In [None]:
# borrowed from https://www.kaggle.com/pashupatigupta/sentiments-transformer-vader-embedding-bert
sia = SentimentIntensityAnalyzer()
def find_sentiment(post):
    if sia.polarity_scores(post)["compound"] > 0:
        return "Positive"
    elif sia.polarity_scores(post)["compound"] < 0:
        return "Negative"
    else:
        return "Neutral" 

In [None]:
def plot_sentiment(df, feature, title):
    counts = df[feature].value_counts()
    percent = counts/sum(counts)

    fig, (ax1, ax2) = plt.subplots(ncols=2, figsize=(12, 5))

    counts.plot(kind='bar', ax=ax1, color='green')
    percent.plot(kind='bar', ax=ax2, color='blue')
    ax1.set_ylabel(f'Counts : {title} sentiments', size=12)
    ax2.set_ylabel(f'Percentage : {title} sentiments', size=12)
    plt.suptitle(f"Sentiment analysis: {title}")
    plt.tight_layout()
    plt.show()

#By definition Accomplishments won't have negative sentiment. Only for those that are envious/resentful for others.

Plus I want to save that script for another (NLP) Dataset. 

In [None]:
df = df.loc[~df.occupation_s.isna()]
df['occupation_s_sentiment'] = df['occupation_s'].apply(lambda x: find_sentiment(x))
plot_sentiment(df, 'occupation_s_sentiment', 'occupation_s')

#For those that think that Occupations don't have Negative Sentiments, trust me they have. I hate one of my jobs and it was a relief to get rid of it. And most part of people don't like their jobs too. 

In [None]:
df = df.loc[~df.inventions_accomplishments.isna()]
df['inventions_accomplishments_sentiment'] = df['inventions_accomplishments'].apply(lambda x: find_sentiment(x))
plot_sentiment(df, 'inventions_accomplishments_sentiment', 'inventions_accomplishments')

In [None]:
def find_sentiment_polarity_textblob(post):
    blob = TextBlob(post)
    polarity = 0
    for sentence in blob.sentences:
        polarity += sentence.sentiment.polarity
    return polarity

def find_sentiment_subjectivity_textblob(post):
    blob = TextBlob(post)
    subjectivity = 0
    for sentence in blob.sentences:
        subjectivity += sentence.sentiment.subjectivity
    return subjectivity

In [None]:
df['inventions_accomplishments_sentiment_polarity'] = df['inventions_accomplishments'].apply(lambda x: find_sentiment_polarity_textblob(x))
df['inventions_accomplishments_sentiment_subjectivity'] = df['inventions_accomplishments'].apply(lambda x: find_sentiment_subjectivity_textblob(x))

In [None]:
def plot_sentiment_textblob(df, feature, inventions_accomplishments ):
    polarity = df[feature+'_sentiment_polarity']
    subjectivity = df[feature+'_sentiment_subjectivity']

    fig, (ax1, ax2) = plt.subplots(ncols=2, figsize=(12, 5))

    polarity.plot(kind='kde', ax=ax1, color='magenta')
    subjectivity.plot(kind='kde', ax=ax2, color='green')
    ax1.set_ylabel(f'Sentiment polarity : {inventions_accomplishments}', size=12)
    ax2.set_ylabel(f'Sentiment subjectivity: {inventions_accomplishments}', size=12)
    plt.suptitle(f"Sentiment analysis (polarity & subjectivity): {inventions_accomplishments}")
    plt.tight_layout()
    plt.show()

In [None]:
plot_sentiment_textblob(df, "inventions_accomplishments", 'Inventions Accomplishments')

In [None]:
df['occupation_s_sentiment_polarity'] = df['occupation_s'].apply(lambda x: find_sentiment_polarity_textblob(x))
df['occupation_s_sentiment_subjectivity'] = df['occupation_s'].apply(lambda x: find_sentiment_subjectivity_textblob(x))

In [None]:
plot_sentiment_textblob(df, "occupation_s", 'Occupations')

In [None]:
#Code by Olga Belitskaya https://www.kaggle.com/olgabelitskaya/sequential-data/comments
from IPython.display import display,HTML
c1,c2,f1,f2,fs1,fs2=\
'#eb3434','#eb3446','Akronim','Smokum',30,15
def dhtml(string,fontcolor=c1,font=f1,fontsize=fs1):
    display(HTML("""<style>
    @import 'https://fonts.googleapis.com/css?family="""\
    +font+"""&effect=3d-float';</style>
    <h1 class='font-effect-3d-float' style='font-family:"""+\
    font+"""; color:"""+fontcolor+"""; font-size:"""+\
    str(fontsize)+"""px;'>%s</h1>"""%string))
    
    
dhtml('Script borrowed from Gabriel Preda. Thanks Gabe.' )