# UV radiation and COVID-19 deaths in *small* countries

The median UV radiation (CAMS **[uvbed](https://confluence.ecmwf.int/display/CUSF/CAMS+global+UV+index)** parameter) is calculated in a **single** geographical point for each country. This value should be more representative of the general UV radiation of the whole country when considering "smaller" ones. 

**The lower tercile (33%) of countries by surface area is used in this notebook.**

## Inspiration

The article ["Predicted Inactivation of Viruses of Relevance to Biodefense by Solar Radiation"](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC1280232/) predicts that **63%** of positive-sense ssRNA viruses, like COVID-19, will be inactivated by an UV exposure of **25-31J/m2** at **254nm** wavelength. It states:

> Fortunately, the primary photochemical processes that damage the viral DNA or RNA occur at all the solar UV wavelengths, varying only in the efficiency of the different wavelengths

However [CAMS uvbed](https://confluence.ecmwf.int/display/CUSF/CAMS+global+UV+index) calculates something different from the UV solar radiation (the spectral solar irradiance in the wavelength range 280-340nm convoluted with the erythema spectrum), it may be a good indicator of solar radiation **virucidal effect** (if any).

## Main parameters


In [None]:
mind= 1          # min number of deaths or confirmed cases
numd= 20         # range of days for growth (after) and for average UV radiation (before)
data="deaths"    # confirmed or deaths
percen=33        # lower percentile of countries selected (by surface area)

## Combine COVID-19 deaths and UV radiation information

For each country:
* Calculate the growth of deaths in next **"numd"** days after the first date with at least **"mind"** deaths
* Calculate the *median* daily maximum UV radiation in previous **"numd"** days before the first date with at least **"mind"** deaths

In [None]:
import pandas as pd
from json import load
from numpy import nan
from datetime import timedelta

ds1='/kaggle/input/uv-biologically-effective-dose-from-cams/' #dataset 1
ds2='/kaggle/input/corona-virus-time-series-dataset/'         #dataset 2
ds3='/kaggle/input/country-data/'                             #dataset 3

df= pd.read_csv(ds2+"COVID-19/csse_covid_19_data/UID_ISO_FIPS_LookUp_Table.csv",header=0,index_col=0)
isos=  load(open(ds3+"country-by-abbreviation.json"))
areas= load(open(ds3+"country-by-surface-area.json"))
iso_country=  { i["abbreviation"] : i["country"] for i in isos }
country_area= { a["country"] : a["area"] for a in areas}

def iso_area(iso):
    if iso in iso_country and iso_country[iso] in country_area:
        return country_area[iso_country[iso]]
    else:
        return nan

df["Area"]= df["iso2"].apply(iso_area)
df.to_csv("UID_ISO_FIPS_LookUp_Table_small.csv")

df= pd.read_csv(ds1+"uvbed.csv",header=0)
df["date"]= pd.to_datetime(df.date,format="%Y%m%d")
dflu= pd.read_csv(ds1+"LookUp_Table.csv",header=0)
df= df.merge(dflu,on="UID")

#deaths / confirmed  global
dfu= pd.read_csv(ds2+"COVID-19/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_%s_global.csv"%data,header=0)
dfu= dfu.loc[~dfu.Lat.isnull() & ~dfu.Long.isnull() & dfu["Province/State"].isnull()]

#Use "Country/Region" to join with "Combined_Key" column
dfu["Combined_Key"]= dfu["Country/Region"] 

def calculate_growth(ser,mind,numd):
    ser= ser.tail(-4).head(-1) #remove non date columns    
    if "Population" in ser.index: ser= ser.tail(-1)
    g,d=0,""
    ser= ser.loc[ser>mind]
    vals= ser.values
    if len(vals)>=numd:
        g= vals[numd-1]/vals[0]
        d= ser.index[0]
    return g,d

g,d,j= [],[],[]
for i in dfu.index:
    gr,da= calculate_growth(dfu.loc[i],mind,numd)
    g.append(gr)
    d.append(da)
    if da == "":
        j.append(nan)
        continue
    da= pd.to_datetime(da,infer_datetime_format=True)
    da0= da - timedelta(numd)
    mask= (df.Combined_Key == dfu.Combined_Key[i]) & (df.date >= da0) & (df.date <= da)    
    uv= df.loc[mask]["uvbed[W/m2]"].median()
    j.append(uv)
    #print(i,"%40s"%dfu.Combined_Key[i],da,"growth=%6.1f"%gr,"uv=%6.4fW/m2"%uv)

dfu["growth"]= g
dfu["1st_date"]= d
dfu["uvbed[W/m2]"]= j

mask= (~dfu.growth.isnull()) & (~dfu["uvbed[W/m2]"].isnull())
dfu= dfu.loc[mask]

#Population
if not "Population" in dfu.columns:
        dfp= pd.read_csv("UID_ISO_FIPS_LookUp_Table_small.csv",index_col=0)
        dfp= dfp[["Population","Area","Combined_Key","iso2"]]
        dfu= dfu.merge(dfp,on="Combined_Key")

mask= (~dfu.Population.isnull()) & (~dfu.Area.isnull())
dfu= dfu.loc[mask]        
        
#save only relevant columns
dfu= dfu[["iso2","Combined_Key","Lat","Long","Population","Area","1st_date","growth","uvbed[W/m2]"]]
dfu.to_csv("US_maxuv_growth.csv")
#dfu.head()
dfu.sort_values(["1st_date"]).tail(20)

##  Countries which have reported deaths

In [None]:
df= pd.read_csv("US_maxuv_growth.csv",header=0,index_col=0)
from numpy import log
g= data+" growth"
df[g]= df["growth"]
df["log10(growth)"]= log(df.growth)/log(10)

import matplotlib.pyplot as plt
import seaborn as sns
sns.set()
plt.figure(figsize=(18,12))
cmap= sns.diverging_palette(220, 20, as_cmap=True)
p= sns.scatterplot(hue="uvbed[W/m2]",size=g,y="Lat",x="Long",data=df,palette=cmap)
for i in df.index:
    p.text(df.Long[i],df.Lat[i],df.Combined_Key[i].lower(),size='small',horizontalalignment='left')
plt.show()

## The lower tercile of "small" countries

In [None]:
#keep only countries below
from numpy import percentile
threshold= percentile(df.Area.values,33)
mask= df.Area<threshold 
df= df.loc[mask]

plt.figure(figsize=(18,12))
p= sns.scatterplot(hue="uvbed[W/m2]",size=g,y="Lat",x="Long",data=df,palette=cmap)
for i in df.index:
    p.text(df.Long[i],df.Lat[i],df.Combined_Key[i].lower(),size='small',horizontalalignment='left')
plt.show()
df.sort_values(["1st_date"])

## Deaths-growth against country "outbreak" date 

In [None]:
from datetime import datetime
df["1st_date"]= pd.to_datetime(df["1st_date"])
df["first date with %d %s or more"%(mind,data)]= df["1st_date"].apply(datetime.timestamp)

plt.figure(figsize=(18,12))
X= "first date with %d %s or more"%(mind,data)
Y= "log10(growth)"
Z= "uvbed[W/m2]"
cmap= sns.diverging_palette(220, 20, as_cmap=True)
p= sns.scatterplot(y=Y,x=X,hue=Z,size=Z,data=df,palette=cmap)
for i in df.index:
    p.text(df[X][i], df[Y][i], df.Combined_Key[i].lower(), horizontalalignment='left', size='small', color='black')
xticks = p.get_xticks()
xticks_dates = [datetime.fromtimestamp(x).strftime('%Y-%m-%d') for x in xticks]
p.set_xticklabels(xticks_dates)

from numpy import percentile
upper_thres= percentile(df[Z],67)
lower_thres= percentile(df[Z],33)
lower_uv= df.loc[df[Z]<lower_thres]
upper_uv= df.loc[df[Z]>upper_thres]

# Draw the two density plots
sns.kdeplot(upper_uv[X],upper_uv[Y],cmap="Reds", shade=True, shade_lowest=True,alpha=0.3)
sns.kdeplot(lower_uv[X],lower_uv[Y],cmap="Blues", shade=True, shade_lowest=True,alpha=0.3)
plt.show()

## Regression plot of deaths-growth against UV-radiation (uvbed)

In [None]:
plt.figure(figsize=(18,12))
X= "uvbed[W/m2]"
Y= "log10(growth)"
p= sns.regplot(y=Y,x=X,data=df)
for i in df.index:
    p.text(df[X][i], df[Y][i], df.Combined_Key[i].lower(), size='small', horizontalalignment='left')
plt.show()

## Regression plots of deaths-growth against other columns: date, population, area, density, latitude and longitude

In [None]:
from datetime import datetime
df["1st_date"]= pd.to_datetime(df["1st_date"])
df["first date with %d %s or more"%(mind,data)]= df["1st_date"].apply(datetime.timestamp)

fig, axes =plt.subplots(3,2,figsize=(24,16))
TS= "first date with %d %s or more"%(mind,data)
p= sns.regplot(y=Y,x=TS,data=df,ax=axes[0,0])
for i in df.index:
    p.text(df[TS][i], df[Y][i], df.Combined_Key[i].lower(), horizontalalignment='left', size='small', color='black')
xticks = p.get_xticks()
xticks_dates = [datetime.fromtimestamp(x).strftime('%Y-%m-%d') for x in xticks]
p.set_xticklabels(xticks_dates)

df["log10(Population)"]= log(df.Population)/log(10)
df["log10(Area)"]= log(df.Area)/log(10)
df["Density"]= df.Population/df.Area
df["log10(Density)"]= log(df.Density)/log(10)

a=0
for X in ["log10(Population)","log10(Area)","log10(Density)","Lat","Long"]:
    a+=1
    p= sns.regplot(y=Y,x=X,data=df,ax=axes[a//2,a%2])
    for i in df.index:
        p.text(df[X][i], df[Y][i], df.Combined_Key[i].lower(), size='small', horizontalalignment='left')
plt.show()

## Statistical tests

In [None]:
from scipy.stats import kendalltau,pearsonr
for t in [kendalltau,pearsonr]:
    print()
    print("%s TEST"%t.__name__.upper())
    for c in ["uvbed[W/m2]","Population","Area","Density","Lat","Long",TS]:
        coef,p_value= t(df[c].values,df.growth.values)
        print("%40s: cor-coef=%6.3f p-value=%.10f"%(c,coef,p_value))