# **Visualisation of US Politicians data**

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
import seaborn as sns

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

## **Loading the dataset**

In [None]:
df = pd.read_csv("/kaggle/input/us-politicians-twitter-dataset/dataset.csv")
print (df.shape)
df.head()

In [None]:
df.drop(columns=['Twitter_username', 'Account_start_time', 'Account_ID', 'Birthday'], inplace=True)
df.drop_duplicates(inplace=True)
df.reset_index(drop=True, inplace=True)

### **Number of Politicians of each Gender in top 10 major political parties**

In [None]:
df_temp = df.drop_duplicates(subset=['Name', 'Political_party'])
plt.figure(figsize=(14,8))
sns.set(style='darkgrid')
sns.countplot(y='Political_party', hue='Sex', data=df_temp, order=df_temp['Political_party'].value_counts().index[:10], palette="Set2")
plt.xscale('log')
plt.legend(loc='lower right')
plt.xlabel("Number of Politicians", weight='bold')
plt.ylabel("Political Party", weight='bold')
plt.legend(title='Gender', shadow=True, facecolor='lightyellow')
plt.show()

### **Age distribution of Politicians of each Gender in top 10 major political parties**

In [None]:
df_temp = df.drop_duplicates(subset=['Name', 'Political_party'])
plt.figure(figsize=(16,10))
sns.set(style='darkgrid')
sns.boxplot(hue="Sex", y='Age', x='Political_party', data=df_temp, order=df_temp['Political_party'].value_counts().index[:10], palette="Set2")
plt.xticks(rotation=90)
plt.xlabel("Political Party", weight='bold')
plt.ylabel("Age", weight='bold')
plt.legend(title='Gender', shadow=True, facecolor='lightyellow')
plt.show()

### **Age Distribution of Politicians in top 10 major countries (birthplace)**

In [None]:
df_temp = df.drop_duplicates(subset=['Name'])
plt.figure(figsize=(16,10))
sns.set(style='darkgrid')
sns.boxplot(y='Age', x='Birthplace', data=df_temp, order=df_temp['Birthplace'].value_counts().index[:15], palette="Set2")
plt.xticks(rotation=90)
plt.xlabel("Birth Place", weight='bold')
plt.ylabel("Age", weight='bold')
plt.show()

### **Number of total Politicians v/s the number of Politicians using Instagram in top 15 major political parties**

In [None]:
df_temp = df.groupby('Political_party')[['Name', 'Instagram_username']].nunique().sort_values(by='Name', ascending=False).head(15)
df_temp.reset_index(inplace=True)
sns.set(style='darkgrid')
plt.figure(figsize=(12,8))
sns.barplot(x='Political_party', y='Name', data=df_temp, color='b', label='Total Politicians')
sns.barplot(x='Political_party', y='Instagram_username', data=df_temp, color='g', label='Politicians using Instagram')
plt.yscale('log')
plt.xticks(rotation=90)
plt.xlabel('Political Party', weight='bold')
plt.ylabel('Number of Politicians', weight='bold')
plt.legend(title='Politicians', shadow=True, facecolor='lightyellow')
plt.show()

### **Percentage of foreign-born US politicians to US-born politicians**

In [None]:
from collections import Counter
df_temp = df.drop_duplicates(subset=['Name'])
dict_ = {}
dict_['US-born'] = df_temp[(df_temp['Birthplace'] == 'United States of America')].shape[0]
dict_['Foreign-born'] = df_temp[~(df_temp['Birthplace'] == 'United States of America')].shape[0]

plt.figure(figsize=(6,6))
plt.pie(x=dict_.values(), labels=dict_.keys(), autopct='%1.1f%%', shadow=True, 
        startangle=0, explode = [0.1, 0])
plt.show()

### **Percentage of top 10 foreign-born politicians**

In [None]:
from collections import Counter
df_temp = df.drop_duplicates(subset=['Name'])
df_temp = df_temp[~(df_temp['Birthplace'] == 'United States of America')]
dict_ = Counter(df_temp['Birthplace'].value_counts().to_dict())
dict_ = dict(dict_.most_common(10))

plt.figure(figsize=(8,8))
plt.pie(x=dict_.values(), labels=dict_.keys(), autopct='%1.1f%%', shadow=True, startangle=90)
plt.show()

### **Number of Politicians of each Gender in top 10  foreign-born countries**

In [None]:
df_temp = df.drop_duplicates(subset=['Name'])
df_temp = df_temp[~(df_temp['Birthplace'] == 'United States of America')]
plt.figure(figsize=(14,8))
sns.set(style='darkgrid')
sns.countplot(x='Birthplace', hue='Sex', data=df_temp, order=df_temp['Birthplace'].value_counts().index[:10], palette="Set2")
plt.xlabel('Birthplace', weight='bold')
plt.ylabel('Number of Politicians', weight='bold')
plt.legend(title='Gender', shadow=True, facecolor='lightyellow')
plt.show()

**Feel free to Upvote and provide Feedback**