In [1]:
import pandas as pd
import plotly.express as px

### Import dataset

In [2]:
df = pd.read_csv('../data/patients_data.csv', low_memory=False)

### Dataset columns

In [3]:
for column in df.columns:
    print(column)

patient_number
p_id
state_patient_number
date_announced
age_bracket
gender
detected_city
detected_district
detected_state
state_code
nationality
type_of_transmission
contracted_from_which_patient_suspected
status_change_date
current_status
estimated_onset_date
source1
source2
source3
notes
backup_notes


### Preview of the dataset

In [4]:
df.head()

Unnamed: 0,patient_number,p_id,state_patient_number,date_announced,age_bracket,gender,detected_city,detected_district,detected_state,state_code,...,type_of_transmission,contracted_from_which_patient_suspected,status_change_date,current_status,estimated_onset_date,source1,source2,source3,notes,backup_notes
0,1,P1,KL-TS-P1,30/01/2020,20.0,F,Thrissur,Thrissur,Kerala,KL,...,Imported,,14/02/2020,Recovered,,https://twitter.com/vijayanpinarayi/status/122...,https://weather.com/en-IN/india/news/news/2020...,,Travelled from Wuhan,Student from Wuhan
1,2,P2,KL-AL-P1,02/02/2020,,,Alappuzha,Alappuzha,Kerala,KL,...,Imported,,14/02/2020,Recovered,,https://www.indiatoday.in/india/story/kerala-r...,https://weather.com/en-IN/india/news/news/2020...,,Travelled from Wuhan,Student from Wuhan
2,3,P3,KL-KS-P1,03/02/2020,,,Kasaragod,Kasaragod,Kerala,KL,...,Imported,,14/02/2020,Recovered,,https://www.indiatoday.in/india/story/kerala-n...,https://twitter.com/ANI/status/122422148580539...,https://weather.com/en-IN/india/news/news/2020...,Travelled from Wuhan,Student from Wuhan
3,4,P4,DL-P1,02/03/2020,45.0,M,East Delhi (Mayur Vihar),East Delhi,Delhi,DL,...,Imported,,15/03/2020,Recovered,,https://www.indiatoday.in/india/story/not-a-ja...,https://economictimes.indiatimes.com/news/poli...,,"Travelled from Austria, Italy",Travel history to Italy and Austria
4,5,P5,TS-P1,02/03/2020,24.0,M,Hyderabad,Hyderabad,Telangana,TG,...,Imported,,02/03/2020,Recovered,,https://www.deccanherald.com/national/south/qu...,https://www.indiatoday.in/india/story/coronavi...,https://www.thehindu.com/news/national/coronav...,"Travelled from Dubai to Bangalore on 20th Feb,...","Travel history to Dubai, Singapore contact"


### Age

In [47]:
age = df.age_bracket
age = age.dropna()
age = age.replace('28-35', '29')
age = pd.to_numeric(age)

# Converting age into categories
age_categorical = []
for each_age in age:
    if each_age <= 5:
        age_categorical.append('0-5')
    elif 5 < each_age <= 12:
        age_categorical.append('6-12')
    elif 12 < each_age <= 18:
        age_categorical.append('12-18')
    elif 18 < each_age <= 25:
        age_categorical.append('18-25')
    elif 25 < each_age <= 35:
        age_categorical.append('25-35')
    elif 45 < each_age <= 55:
        age_categorical.append('45-55')
    elif 55 < each_age <= 65:
        age_categorical.append('55-65')
    elif 65 < each_age <= 75:
        age_categorical.append('65-75')
    else:
        age_categorical.append('above 75')

# Create a dataframe
df_age_count = pd.DataFrame(pd.Series(age_categorical).value_counts(), columns = ['Count of Infections'])
df_age_count['Age'] = df_age_count.index

# Visualization of infections with respect to categorizes of age
fig = px.bar(df_age_count, y='Count of Infections', x='Age', color='Age', height=400)
fig.show()

### Gender

In [6]:
gender = df.gender
gender = gender.dropna()


# Calculate gender count
df_gender = pd.DataFrame(gender.value_counts())

# Data manipulation
df_gender['Gender'] = df_gender.index
df_gender['Gender'] = df_gender['Gender'].replace({'M': 'Male', 'F': 'Female'})
df_gender['Count'] = df_gender['gender']


# Visualization of infections with respect to gender
fig = px.bar(df_gender, y='Count', x='Gender', color='Gender',
           labels={'Count of Infections':'Gender'}, height=400)
fig.show()

### State

In [7]:
states = df.detected_state
states = states.dropna()
df_states = pd.DataFrame(states.value_counts())

df_states['State'] = df_states.index
df_states['Number of Infections'] = df_states.detected_state

fig = px.bar(df_states, y='Number of Infections', x='State', color='State', height=800)
fig.show()

### Infected Cities

In [8]:
city = df.detected_city
city = city.dropna()
df_city = pd.DataFrame(city.value_counts())

# Data manipulation
city_state_map = {key:value for key, value in zip(df.detected_city, df.detected_state) }
df_city['city'] = df_city.index
df_city['Number of Infections'] = df_city.detected_city
df_city['State'] = df_city['city'].map(city_state_map)

# Data Selection
df_city_top_20 = df_city.head(20)

# Data Visualization
fig = px.bar(df_city_top_20, y='Number of Infections', x='city',
             color='State',
             height=800)
fig.show()

### Type of transmission

In [9]:
types = df.type_of_transmission
types = types.dropna()
df_types = pd.DataFrame(types.value_counts())

df_types['Types'] = df_types.index
df_types['Types'] = df_types.Types.replace('Imported ','Imported')
df_types['Number of Infections'] = df_types.type_of_transmission

fig = px.bar(df_types, y='Number of Infections', x='Types', color='Types',
             height=800)
fig.show()

### Top infectors

In [12]:
df.contracted_from_which_patient_suspected.unique()
suspects = df.contracted_from_which_patient_suspected
df_suspects = pd.DataFrame(suspects.value_counts())
df_suspects['Suspect'] = df_suspects.index

# Data cleaning
df_suspects['Suspect'] = df_suspects['Suspect'].replace(
    { 'https://twitter.com/ArogyaAndhra/status/1249575886665334784': 'Unknown',
      'https://twitter.com/ArogyaAndhra/status/1249575886665334785': 'Unknown',
      'https://twitter.com/ArogyaAndhra/status/1249575886665334786': 'Unknown',
      'https://twitter.com/ArogyaAndhra/status/1249575886665334787': 'Unknown',
      'https://twitter.com/ArogyaAndhra/status/1249575886665334788': 'Unknown',
      'https://twitter.com/ArogyaAndhra/status/1249575886665334789': 'Unknown',
      'https://twitter.com/ArogyaAndhra/status/1249575886665334790': 'Unknown',
      'https://twitter.com/ArogyaAndhra/status/1249575886665334791': 'Unknown',
      'https://twitter.com/ArogyaAndhra/status/1249575886665334792': 'Unknown',
      'https://twitter.com/ArogyaAndhra/status/1249575886665334793': 'Unknown',
      'https://twitter.com/ArogyaAndhra/status/1249575886665334794': 'Unknown',
      'https://twitter.com/ArogyaAndhra/status/1249575886665334795': 'Unknown',
       None:'Unknown',
      'Intern at AIIMS Rishikesh':'Unknown'
  })

# Data manipulation
suspect_state_map = {key:value for key, value in zip(df.contracted_from_which_patient_suspected, df.detected_state) }
df_suspects['Number of Infections'] = df_suspects['contracted_from_which_patient_suspected']
df_suspects['State'] = df_suspects['Suspect'].map(suspect_state_map)

df_suspects_top_10 = df_suspects.head(10)

# Data visualization
fig = px.bar(df_suspects_top_10, y='Number of Infections', x='Suspect', color='State',
             height=800)
fig.show()

### Status

In [13]:
status = df.current_status
status = status.replace({ None:'Unknown'})

df_status = pd.DataFrame(status.value_counts())
df_status['Status'] = df_status.index
df_status['Count'] = df_status.current_status

# Data visualization
fig = px.bar(df_status, y='Count', x='Status', color='Status',
             height=800)
fig.show()