In [None]:
import numpy as np
import pandas as pd 
from matplotlib import pyplot as plt
import seaborn as sns

In [None]:
timeSeriesConfirmed = pd.read_csv('../input/covid19/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv')
timeSeriesConfirmed

# Attribute Alterations

In [None]:
timeSeriesConfirmed = timeSeriesConfirmed.drop(columns = ['Lat', 'Long'])
timeSeriesConfirmed = timeSeriesConfirmed.groupby('Country/Region').sum().reset_index()
timeSeriesConfirmed = timeSeriesConfirmed.rename(columns = {'Country/Region':'Country'})
timeSeriesConfirmed

# Calculation For Daily new cases

In [None]:
timeSeriesDailyNew = timeSeriesConfirmed.copy()

for i in range(len(timeSeriesDailyNew.columns)-2):
    timeSeriesDailyNew.iloc[:,-1-i] = timeSeriesDailyNew.iloc[:,-1-i] - timeSeriesDailyNew.iloc[:,-2-i]
    
for col in timeSeriesDailyNew.columns:
    if col != 'Country' :
        timeSeriesDailyNew[col] = pd.to_numeric(timeSeriesDailyNew[col])
            
timeSeriesDailyNew

# Plotting DailyNewCases for US

In [None]:
plt.figure(figsize=(30,10))    
timeSeriesDailyNew[timeSeriesDailyNew.Country == 'US'].iloc[0][1:].plot(kind = 'line')

# 14-day interval plot

In [None]:
plt.figure(figsize=(30,10))    
plt.plot(list(np.linspace(1,14,14)), list(timeSeriesDailyNew[timeSeriesDailyNew.Country == 'US'].iloc[0][56:70].values))

# Fitting a curve on said Interval

In [None]:
plt.figure(figsize=(30,10))    
poly = np.polyfit(list(np.linspace(1,14,14)), list(timeSeriesDailyNew[timeSeriesDailyNew.Country == 'US'].iloc[0][56:70].values), 2)
xp = np.linspace(1,14,200)
plt.plot(list(np.linspace(1,14,14)), list(timeSeriesDailyNew[timeSeriesDailyNew.Country == 'US'].iloc[0][56:70].values))
plt.plot(xp, np.polyval(poly,xp), 'r--')

**Function to generate polynomial fit for n-day intervals**

In [None]:
def toPolyRegression(country, nDays):
    timeSeriesCountry = timeSeriesDailyNew[timeSeriesDailyNew.Country == country]
    daysIntervalnDays = []
    i = 0
    while True:
        try:
            daysIntervalnDays.append((((i)*nDays), np.polyfit(list(np.linspace((i)*nDays,(i+1)*nDays,nDays)), list(timeSeriesCountry.iloc[0][1 + (i*nDays) : nDays + 1 + (i*nDays)].values), 2)))
            i = i + 1
        except:
            break
    return daysIntervalnDays

def toPolyRegressionAbsolute(country, nDays):
    timeSeriesCountry = timeSeriesDailyNew[timeSeriesDailyNew.Country == country]
    daysIntervalnDays = []
    i = 0
    while True:
        try:
            daysIntervalnDays.append((((i)*nDays), np.polyfit(list(np.linspace(1, nDays, nDays)), list(timeSeriesCountry.iloc[0][1 + (i*nDays) : nDays + 1 + (i*nDays)].values), 2)))
            i = i + 1
        except:
            break
    return daysIntervalnDays

# Plotting combined polynomial fits in 14 day intervals 

In [None]:
plt.figure(figsize=(30,10))
country = 'US'
polyUS = toPolyRegression(country, nDays = 14)
timeSeriesCountry = timeSeriesDailyNew[timeSeriesDailyNew.Country == country]
totalDays = len(timeSeriesCountry.iloc[0][1:].values)
plt.plot(totalDays, totalDays, timeSeriesCountry.iloc[0][1:].values, 'b:')

for day, poly in polyUS:
    xp = np.linspace(day, day+14 ,200)
    plt.plot(xp, np.polyval(poly, xp), 'r-')

In [None]:
def polyRegressionToAB(poly) :
    AB = []
    for day, curve in poly:
        AB.append(list(curve[:2]))
    return AB

def getCoeffAccum():
    temp = []
    countries = list(timeSeriesDailyNew['Country'])
    for country in countries:
        temp = temp + polyRegressionToAB(toPolyRegressionAbsolute(country, 14))
    return temp

In [None]:
plt.figure(figsize=(15,5))
plt.xlim([-500, 500])
plt.ylim([-10000, 10000])
x = [_[0] for _ in getCoeffAccum()]
y = [_[1] for _ in getCoeffAccum()]
plt.scatter(x,y, marker = '.')

In [None]:
from sklearn.cluster import AffinityPropagation

clustering = AffinityPropagation(verbose = True, damping = 0.9, max_iter = 1000, random_state = 0).fit(np.array(getCoeffAccum()))
print('Number of clusters Formed: ', max(clustering.labels_ + 1))

In [None]:
def getClusterLabelsByCountry(counrty):
    return clustering.predict(polyRegressionToAB(toPolyRegressionAbsolute(country, 14)))

getClusterLabelsByCountry('US')

In [None]:
countries = list(timeSeriesDailyNew['Country'])
trainingClusterLabels = {}
testingClusterLabels = {}

for country in countries:
    trainingClusterLabels[country] = getClusterLabelsByCountry(country)
    testingClusterLabels[country] = getClusterLabelsByCountry(country)[15:25]

testingClusterLabels

In [None]:
def getIndexNGram(ng, labels):
    indices = []
    for i in range(len(labels) - len(ng)):
        flag = True
        for j in range(len(ng)):
            if ng[j] != labels [j + i]:
                flag = False
        if flag:
            indices.append(i)
    return indices


# getIndexNGram([8,16], [ 70,  8,  16, 224, 163, 163,   8,  16,   2,   8])

def findProbab(ipNGram, trainingSet):
    predFrequency = {}
    total = 0
    for country, labels in trainingSet.items():
        indices = getIndexNGram(ipNGram, labels)
        predLabels = [labels[i+len(ipNGram)] for i in indices]
        for _ in predLabels:
            if _ in predFrequency:
                predFrequency[_] = predFrequency[_] + 1
            else:
                predFrequency[_] = 1
    for _ in predFrequency.values():
        total = total + _
    predProbab = {}
    for a,b in predFrequency.items():
        predProbab[a] = b*100/total
    return predProbab

In [None]:
countries = list(timeSeriesDailyNew['Country'])
trainingClusterLabels = {}
testingClusterLabels = {}
limits = (20,25)
for country in countries:
    trainingClusterLabels[country] = getClusterLabelsByCountry(country)
    testingClusterLabels[country] = getClusterLabelsByCountry(country)[limits[0]:limits[1]]

In [None]:
def plotPrediction(country, testingSet, trainingSet, nDays):
    probabilityTable = findProbab(testingSet[country][-2:], trainingSet)
    maxProbabilityLabel = max(probabilityTable, key = probabilityTable.get)
    center = clustering.cluster_centers_[maxProbabilityLabel]
    plt.plot(list(np.linspace(limits[0]*nDays,limits[1]*nDays,(limits[1]-limits[0])*nDays)), list(timeSeriesDailyNew[timeSeriesDailyNew.Country == country].iloc[0][limits[0]*nDays:limits[1]*nDays].values), 'b-')
    C = timeSeriesDailyNew[timeSeriesDailyNew.Country == country].iloc[0][limits[1]*nDays]
    poly = np.array([center[0], center[1], C])
    xp = np.linspace(1,nDays,100)
    plt.plot(np.linspace(nDays*limits[1],nDays*(limits[1]+1),100), np.polyval(poly, xp), 'r:')

In [None]:
plotPrediction('US', testingClusterLabels, trainingClusterLabels, 14)

In [None]:
plotPrediction('India', testingClusterLabels, trainingClusterLabels, 14)

In [None]:
plotPrediction('France', testingClusterLabels, trainingClusterLabels, 14)

In [None]:
#---------------------anubhav----------------------------
#zimbzbwe
findProbab([63,143], trainingClusterLabels)

In [None]:
dict = clustering.cluster_centers_

In [None]:
Zimbabwe=toPolyRegressionAbsolute('Zimbabwe', 14)
Zimbabwe

In [None]:
len(dict)

In [None]:
dict

In [None]:
#dict[147,1]
eq=[dict[146,0],dict[146,1],29.64285714]

In [None]:
plt.plot(np.arange(14)+406, np.polyval(eq, np.arange(14)), 'r-')

In [None]:
plt.figure(figsize=(30,10))
country = 'US'
polyUS = toPolyRegression(country, nDays = 14)
timeSeriesCountry = timeSeriesDailyNew[timeSeriesDailyNew.Country == country]
totalDays = len(timeSeriesCountry.iloc[0][1:].values)
plt.plot(totalDays, totalDays, timeSeriesCountry.iloc[0][1:].values, 'b:')

for day, poly in polyUS:
    xp = np.linspace(day, day+14 ,200)
    plt.plot(xp, np.polyval(poly, xp), 'r-')
plt.plot(np.arange(14)+420, np.polyval(eq, np.arange(14)), 'r-')