In [9]:
import matplotlib.pyplot as plt
import pandas as pd
import scipy.stats as st
import numpy as np


In [10]:
Adata = pd.read_csv("USA.csv")
Adata.head()

Unnamed: 0,date,state,positive,probableCases,negative,pending,totalTestResultsSource,totalTestResults,hospitalizedCurrently,hospitalizedCumulative,...,posNeg,deathIncrease,hospitalizedIncrease,hash,commercialScore,negativeRegularScore,negativeScore,positiveScore,score,grade
0,20201206,AK,35720.0,,1042056.0,,totalTestsViral,1077776.0,164.0,799.0,...,1077776,0,0,7b1d31e2756687bb9259b29195f1db6cdb321ea6,0,0,0,0,0,
1,20201206,AL,269877.0,45962.0,1421126.0,,totalTestsPeopleViral,1645041.0,1927.0,26331.0,...,1691003,12,0,19454ed8fe28fc0a7948fc0771b2f3c846c1c92e,0,0,0,0,0,
2,20201206,AR,170924.0,22753.0,1614979.0,,totalTestsViral,1763150.0,1076.0,9401.0,...,1785903,40,21,25fc83bffff5b32ba1a737be8e087fad9f4fde33,0,0,0,0,0,
3,20201206,AS,0.0,,2140.0,,totalTestsViral,2140.0,,,...,2140,0,0,8c39eec317586b0c34fc2903e6a3891ecb00469e,0,0,0,0,0,
4,20201206,AZ,364276.0,12590.0,2018813.0,,totalTestsPeopleViral,2370499.0,2977.0,28248.0,...,2383089,25,242,7cf59da9e4bc31d905e179211313d08879880a85,0,0,0,0,0,


In [11]:
#checking columns names
Adata.columns

Index(['date', 'state', 'positive', 'probableCases', 'negative', 'pending',
       'totalTestResultsSource', 'totalTestResults', 'hospitalizedCurrently',
       'hospitalizedCumulative', 'inIcuCurrently', 'inIcuCumulative',
       'onVentilatorCurrently', 'onVentilatorCumulative', 'recovered',
       'dataQualityGrade', 'lastUpdateEt', 'dateModified', 'checkTimeEt',
       'death', 'hospitalized', 'dateChecked', 'totalTestsViral',
       'positiveTestsViral', 'negativeTestsViral', 'positiveCasesViral',
       'deathConfirmed', 'deathProbable', 'totalTestEncountersViral',
       'totalTestsPeopleViral', 'totalTestsAntibody', 'positiveTestsAntibody',
       'negativeTestsAntibody', 'totalTestsPeopleAntibody',
       'positiveTestsPeopleAntibody', 'negativeTestsPeopleAntibody',
       'totalTestsPeopleAntigen', 'positiveTestsPeopleAntigen',
       'totalTestsAntigen', 'positiveTestsAntigen', 'fips', 'positiveIncrease',
       'negativeIncrease', 'total', 'totalTestResultsIncrease', 'posNe

In [12]:
#Re-organized columns 
filled = Adata[["state", "totalTestResults", "death", "recovered", "positive","hospitalized"]]
filled.head()

Unnamed: 0,state,totalTestResults,death,recovered,positive,hospitalized
0,AK,1077776.0,143.0,7165.0,35720.0,799.0
1,AL,1645041.0,3889.0,168387.0,269877.0,26331.0
2,AR,1763150.0,2660.0,149490.0,170924.0,9401.0
3,AS,2140.0,0.0,,0.0,
4,AZ,2370499.0,6950.0,56382.0,364276.0,28248.0


In [13]:
#Finding average of hit, run and strikeouts for each team 

Covid_positive= filled.groupby("state").positive.mean()
Death= filled.groupby("state").death.mean()
Total_Covid_Test= filled.groupby("state").totalTestResults.mean()
Hospitalized= filled.groupby("state").hospitalized.mean()
Covid_recovered= filled.groupby("state").recovered.mean()

B_Analysis = pd.DataFrame({"Covid_positive":(Covid_positive),
                        "Hospitalized":(Hospitalized),
                        "Total_Covid_Test":(Total_Covid_Test),
                        "Death":(Death),
                        "Covid_recovered":(Covid_recovered)
                        })
B_Analysis= B_Analysis.sort_values("state",ascending=True)
B_Analysis["Covid_positive"] = B_Analysis["Covid_positive"].astype(float).map("{:.0f}".format)
B_Analysis["Hospitalized"] = B_Analysis["Hospitalized"].astype(float).map("{:.0f}".format)
B_Analysis["Total_Covid_Test"] = B_Analysis["Total_Covid_Test"].astype(float).map("{:.0f}".format)
B_Analysis["Death"] = B_Analysis["Death"].astype(float).map("{:.0f}".format)
B_Analysis["Covid_recovered"] = B_Analysis["Covid_recovered"].astype(float).map("{:.0f}".format)
B_Analysis.head(5)

Unnamed: 0_level_0,Covid_positive,Hospitalized,Total_Covid_Test,Death,Covid_recovered
state,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
AK,6251.92,203.46,285110.37,34.49,2563.92
AL,88300.94,10618.43,659076.58,1561.85,53931.47
AR,48630.71,3446.17,578834.51,791.12,45153.96
AS,0.0,,939.06,0.0,
AZ,129116.25,11564.79,853265.61,3165.33,23455.22


In [14]:
#Fill nan values with 0
Nan_filled= B_Analysis.replace('nan','0')
Nan_filled.head(5)

Unnamed: 0_level_0,Covid_positive,Hospitalized,Total_Covid_Test,Death,Covid_recovered
state,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
AK,6251.92,203.46,285110.37,34.49,2563.92
AL,88300.94,10618.43,659076.58,1561.85,53931.47
AR,48630.71,3446.17,578834.51,791.12,45153.96
AS,0.0,0.0,939.06,0.0,0.0
AZ,129116.25,11564.79,853265.61,3165.33,23455.22


In [15]:
Nan_filled.dtypes

Covid_positive      object
Hospitalized        object
Total_Covid_Test    object
Death               object
Covid_recovered     object
dtype: object

In [16]:
#Save table as csv file called: Obesity_percentage
Nan_filled.to_csv('USA_cleaned.csv')