# Covid 19 analysis considering diet and happiness

I'm trying to analyse and get some insights from the data available to us. My main motive is to find out the relationship between lifestyle factors like diet, happiness and it's impact on Covid 19 around the world.

In [None]:
import numpy as np 
import pandas as pd 
from matplotlib import pyplot as plt
import seaborn as sns

Let's import the data sets we need.This data will hopefully give us an insight about variety in diet among different populations.We are using,
* Country wise data about total food supply
* World happiness report

In [None]:
df = pd.read_csv('../input/covid19-healthy-diet-dataset/Food_Supply_Quantity_kg_Data.csv')
df.head(10)

In [None]:
df.columns

In [None]:
#Let's check for null values
df.isnull().sum()

In [None]:
df.dropna(inplace=True)
df.isnull().sum()

In [None]:
df["Undernourished"]

In [None]:
df.shape
#We only have  data from 155 countries

In [None]:
#changing the indices of the dataframe
df.set_index("Country", inplace=True)

In [None]:
# happiness_df = pd.read_csv('../input/world-happiness/2019.csv')
# happiness_df.head(5)

In [None]:
df["Undernourished"] = df["Undernourished"].replace('<2.5','2.5').astype(float)
df.describe()

In [None]:
#This column is not at all needed
df.drop('Unit (all except Population)',axis =1, inplace=True)
df.describe()

In [None]:
#A look into overall eating food sources
diet_mean = df.describe().iloc[1]
diet_mean = pd.DataFrame(diet_mean).drop(['Deaths', 'Population','Undernourished','Obesity', 'Recovered', 'Confirmed', 'Active'], axis=0)
diet_mean = diet_mean.sort_values(by='mean', ascending=False).iloc[:11]
diet_mean.plot.pie(subplots=True, figsize=(20, 20), autopct='%1.1f%%')

Let's analyse the correlation matrix to see the  dietary factors affecting confirmed cases 

In [None]:
cor = df.corr()

In [None]:
plt.figure(figsize=(20,20))
sns.heatmap(cor, annot=True, cmap=plt.cm.Reds)
plt.show()

Let's take a look at features that are correlated with confirmed cases and do some plotting.

In [None]:
cor_target = abs(cor["Confirmed"]).drop(["Confirmed","Deaths","Recovered","Active",]).sort_values(ascending=False)
#Selecting highly correlated features
relevant_features = cor_target[cor_target>0.4]
relevant_features

In [None]:
df["Population"].max()

In [None]:
df_Obesity = df.sort_values(by="Obesity",ascending=False)
df_Obesity.head()

I'm going to join some features word happiness report with thi

In [None]:
plt.figure(figsize=(40, 7))
plt.subplot(1,2,1)

plt.plot(df_Obesity["Obesity"],df_Obesity["Confirmed"], label="Confirmed")
plt.plot(df_Obesity["Obesity"],df_Obesity["Recovered"], label="Recovered")

plt.title('Obesity Vs Confirmed cases')
plt.xlabel('Percentage of Obesity')
plt.ylabel('Confirmed cases')
plt.legend(loc='best')

In [None]:
plt.figure(figsize=(40, 7))
plt.subplot(1,2,1)
plt.scatter(df["Obesity"],df["Confirmed"],label="Confirmed")
plt.scatter(df["Obesity"],df["Recovered"], label="Recovered")
plt.scatter(df["Obesity"],df["Deaths"], label="Deaths")

plt.title('Obesity Vs Covid19')
plt.xlabel('Percentage of Obesity')
plt.ylabel('Covid19')
plt.legend(loc='best')

In [None]:
df_Undernourished = df.sort_values(by="Undernourished", ascending = False)
df_Undernourished.head(10)

In [None]:
#We can clearly see the correlation between obesity nd spread of covid 19 from the plots
#Let's analyse the feature "Undernourished" like we did with obesity
plt.figure(figsize=(40, 7))
plt.subplot(1,2,1)
plt.plot(df_Undernourished["Undernourished"],df_Undernourished["Recovered"],label="Recovered")
plt.plot(df_Undernourished["Undernourished"],df_Undernourished["Confirmed"],label="Confirmed")
plt.plot(df_Undernourished["Undernourished"],df_Undernourished["Deaths"],label="Deaths")
#plt.scatter(df_Obesity["Obesity"],df_Obesity["Recovered"], label="Confirmed")

plt.title('Undernourished Vs Confirmed cases')
plt.xlabel('Percentage of Undernourished people')
plt.ylabel('Confirmed cases')
plt.legend(loc='best')

In [None]:
#On the contrary to the popular belief countries who are undernourished seems to have smaller cases.
#There could be many factors like low lifespan associated with it.

In [None]:
#Let's see something.
plt.figure(figsize=(40, 7))
plt.subplot(1,2,1)

plt.scatter(df_Undernourished["Undernourished"],df_Undernourished["Obesity"])
#plt.scatter(df_Obesity["Obesity"],df_Obesity["Recovered"], label="Confirmed")

plt.title('Obesity Vs Undernourished')
plt.ylabel('Percentage of Obesity')
plt.xlabel('Undernourished')
plt.legend(loc='best')


In [None]:
#Looks like obesity is more prevalent in modern populations than undernourished populations

In [None]:
#Let's plot our relevant features
plt.figure(figsize=(40, 7))
plt.subplot(1,2,1)

plt.scatter(df["Vegetal Products"],df["Confirmed"],label="Confirmed")
plt.scatter(df["Vegetal Products"],df["Recovered"],label="Recovered")
plt.scatter(df["Vegetal Products"],df["Deaths"],label="Deaths")

plt.title('Consumption of Vegetal Products Vs covid19')
plt.ylabel('Covid19')
plt.xlabel('Vegetal Products')
plt.legend(loc='best')


In [None]:
#Let's plot our relevant features
plt.figure(figsize=(40, 7))
plt.subplot(1,2,1)

plt.scatter(df["Animal Products"],df["Confirmed"],label="Confirmed")
plt.scatter(df["Animal Products"],df["Recovered"],label="Recovered")
plt.scatter(df["Animal Products"],df["Deaths"],label="Deaths")

plt.title('Consumption of Animal Products Vs covid19')
plt.ylabel('Covid19')
plt.xlabel('Animal Products')
plt.legend(loc='best')

In [None]:
plt.figure(figsize=(40, 7))
plt.subplot(1,2,1)

plt.scatter(df["Cereals - Excluding Beer"],df["Confirmed"],label="Confirmed")
plt.scatter(df["Cereals - Excluding Beer"],df["Recovered"],label="Recovered")
plt.scatter(df["Cereals - Excluding Beer"],df["Deaths"],label="Deaths")

plt.title('Consumption of Cereals Vs covid19')
plt.ylabel('Covid19')
plt.xlabel('Consumption of Cereals')
plt.legend(loc='best')

In [None]:
plt.figure(figsize=(40, 7))
plt.subplot(1,2,1)

plt.scatter(df["Milk - Excluding Butter"],df["Confirmed"],label="Confirmed")
plt.scatter(df["Milk - Excluding Butter"],df["Recovered"],label="Recovered")
plt.scatter(df["Milk - Excluding Butter"],df["Deaths"],label="Deaths")

plt.title('Consumption of Milk Vs covid19')
plt.ylabel('Covid19')
plt.xlabel('Consumption of Milk')
plt.legend(loc='best')

In [None]:
#I'm gonna make a few more plots just out of curiosity

In [None]:
plt.figure(figsize=(40, 7))
plt.subplot(1,2,1)

plt.scatter(df["Cereals - Excluding Beer"],df["Confirmed"],label="Confirmed")
plt.scatter(df["Cereals - Excluding Beer"],df["Recovered"],label="Recovered")
plt.scatter(df["Cereals - Excluding Beer"],df["Deaths"],label="Deaths")

plt.title('Consumption of Cereals Vs covid19')
plt.ylabel('Covid19')
plt.xlabel('Consumption of Cereals')
plt.legend(loc='best')

In [None]:

plt.figure(figsize=(40, 7))
plt.subplot(1,2,1)

plt.scatter(df["Alcoholic Beverages"],df["Confirmed"],label="Confirmed")
plt.scatter(df["Alcoholic Beverages"],df["Recovered"],label="Recovered")
plt.scatter(df["Alcoholic Beverages"],df["Deaths"],label="Deaths")

plt.title('Consumption of Alcoholic Beverages Vs covid19')
plt.ylabel('Covid19')
plt.xlabel('Consumption of Alcoholic Beverages')
plt.legend(loc='best')

In [None]:

plt.figure(figsize=(40, 7))
plt.subplot(1,2,1)

plt.scatter(df["Sugar & Sweeteners"],df["Confirmed"],label="Confirmed")
plt.scatter(df["Sugar & Sweeteners"],df["Recovered"],label="Recovered")
plt.scatter(df["Sugar & Sweeteners"],df["Deaths"],label="Deaths")

plt.title('Consumption of Sugar & Sweeteners Vs covid19')
plt.ylabel('Covid19')
plt.xlabel('Consumption of Sugar & Sweeteners')
plt.legend(loc='best')

In [None]:
#From analysing above plots, looks like food does have some effect in the impact of pandemic.
#Let's look at some other factors now

In [None]:
#Let's import world happiness report from 2019 to so that we can get some insights.
df_report = pd.read_csv('../input/world-happiness-report/2019.csv')
df_report.drop(["Perceptions of corruption","Generosity","Freedom to make life choices"],axis=1,inplace=True)
df_report.head(10)

In [None]:
df_report.shape

In [None]:
#changing the indices of the dataframe
df_report.set_index("Country or region", inplace=True)

In [None]:
df_report.head()

In [None]:
#Lets's take necessary features and join both datasets
#From diet dataset 
df.head()
diet_df=df[["Confirmed","Recovered","Deaths"]]
diet_df.head()

In [None]:
#Let's join the two datasets
#Inner join
final_data = diet_df.join(df_report,how = "inner")
final_data.head()

In [None]:
final_data.shape

In [None]:
final_data.sort_values(by="Overall rank", inplace = True)
final_data.head()

In [None]:
plt.figure(figsize=(40, 7))
plt.subplot(1,2,1)

plt.plot(final_data["Overall rank"],final_data["Confirmed"],label="Confirmed")
plt.plot(final_data["Overall rank"],final_data["Recovered"],label="Recovered")
plt.plot(final_data["Overall rank"],final_data["Deaths"],label="Deaths")

plt.title('World happiness report rank Vs Covid19')
plt.ylabel('Covid19')
plt.xlabel('Overall rank')
plt.legend(loc='best')

In [None]:
plt.figure(figsize=(40, 7))
plt.subplot(1,2,1)

plt.scatter(final_data["Healthy life expectancy"],final_data["Confirmed"],label="Confirmed")
plt.scatter(final_data["Healthy life expectancy"],final_data["Recovered"],label="Recovered")
plt.scatter(final_data["Healthy life expectancy"],final_data["Deaths"],label="Deaths")

plt.title('World happiness report rank Vs Covid19')
plt.ylabel('Covid19 cases')
plt.xlabel('Life Expectency')
plt.legend(loc='best')

In [None]:
plt.figure(figsize=(40, 7))
plt.subplot(1,2,1)

plt.scatter(final_data["GDP per capita"],final_data["Confirmed"],label="Confirmed")
plt.scatter(final_data["GDP per capita"],final_data["Recovered"],label="Recovered")
plt.scatter(final_data["GDP per capita"],final_data["Deaths"],label="Deaths")

plt.title('GDP per capita Vs Covid19')
plt.ylabel('Covid19 cases')
plt.xlabel('GDP per capita')
plt.legend(loc='best')

In [None]:
plt.figure(figsize=(40, 7))
plt.subplot(1,2,1)

plt.scatter(final_data["Social support"],final_data["Confirmed"],label="Confirmed")
plt.scatter(final_data["Social support"],final_data["Recovered"],label="Recovered")
plt.scatter(final_data["Social support"],final_data["Deaths"],label="Deaths")

plt.title('Social support Vs Covid19')
plt.ylabel('Covid19 cases')
plt.xlabel('Social support')
plt.legend(loc='best')