In [36]:
import csv
import pandas as pd
import numpy as np


## 1. Preprocessing
### Import csv file

In [56]:
covid_df = pd.read_csv("data/Cleaned-Data.csv")

In [57]:
print(covid_df.head())
# run this if you want to run this very quickly
# covid_df = covid_df.head(100)

   Fever  Tiredness  Dry-Cough  Difficulty-in-Breathing  Sore-Throat  \
0      1          1          1                        1            1   
1      1          1          1                        1            1   
2      1          1          1                        1            1   
3      1          1          1                        1            1   
4      1          1          1                        1            1   

   None_Sympton  Pains  Nasal-Congestion  Runny-Nose  Diarrhea  ...  \
0             0      1                 1           1         1  ...   
1             0      1                 1           1         1  ...   
2             0      1                 1           1         1  ...   
3             0      1                 1           1         1  ...   
4             0      1                 1           1         1  ...   

   Gender_Male  Gender_Transgender  Severity_Mild  Severity_Moderate  \
0            1                   0              1                  0

In [58]:
symptoms = ['Fever','Tiredness', 'Dry-Cough', 'Difficulty-in-Breathing',  
            'Sore-Throat', 'Pains',  'Nasal-Congestion',  'Runny-Nose',  'Diarrhea' ]
print(len(symptoms))

9


In [59]:
covid_df["None_Experiencing"].value_counts()

0    288000
1     28800
Name: None_Experiencing, dtype: int64

- what is none-symptoms?
- what is none-experiencing

### Omit irrelevant columns

In [60]:
#covid_df.drop(covid_df.index[covid_df['None_Experiencing'] == 1], inplace=True)

In [61]:
covid_df.drop(['None_Experiencing', 'None_Sympton', 'Gender_Male', 'Gender_Transgender', 'Contact_Dont-Know', 'Contact_No','Contact_Yes', 'Age_0-9', 'Age_10-19','Age_20-24' ,'Age_25-59', 'Age_60+','Gender_Female','Country'  ], axis=1, inplace=True)


In [62]:
covid_df = covid_df.astype(float)
print(covid_df.head())


   Fever  Tiredness  Dry-Cough  Difficulty-in-Breathing  Sore-Throat  Pains  \
0    1.0        1.0        1.0                      1.0          1.0    1.0   
1    1.0        1.0        1.0                      1.0          1.0    1.0   
2    1.0        1.0        1.0                      1.0          1.0    1.0   
3    1.0        1.0        1.0                      1.0          1.0    1.0   
4    1.0        1.0        1.0                      1.0          1.0    1.0   

   Nasal-Congestion  Runny-Nose  Diarrhea  Severity_Mild  Severity_Moderate  \
0               1.0         1.0       1.0            1.0                0.0   
1               1.0         1.0       1.0            1.0                0.0   
2               1.0         1.0       1.0            1.0                0.0   
3               1.0         1.0       1.0            0.0                1.0   
4               1.0         1.0       1.0            0.0                1.0   

   Severity_None  Severity_Severe  
0            0

### Naive Multiplier
Severity_Mild ->  normal distribution around 2 with SD = 1 <br>
Severity_Moderate -> normal dist around 5 with SD = 2<br>
Severity_Severe -> normal dist around 8 with SD = 3<br>
Severity_None -> normal dist around 5 with SD = 2<br>



#### Put data onto a normal distribution

In [63]:
# multiply all values by scalar, with sd, around normal dist
def multiplier(row, scalar, sd = None, normal_dist = True):
    if normal_dist:
        for symptom in symptoms:
            row[symptom] = np.abs(row[symptom]*np.random.normal(scalar, sd)) #TODO: cap
    else:
        for symptom in symptoms:
            row[symptom] = row[symptom]*scalar   
    return row

In [64]:
covid_df = covid_df.reset_index()  # make sure indexes pair with number of rows
for index, row in covid_df.iterrows():
    if row['Severity_Mild'] == 1:
        covid_df.iloc[index] = multiplier(row, 2, 1)
        continue
    elif row['Severity_Moderate'] == 1:
        covid_df.iloc[index] = multiplier(row, 5, 2)
        continue
    elif row['Severity_Severe'] == 1:
        covid_df.iloc[index] = multiplier(row, 8, 3)
        continue
    elif row['Severity_None'] == 1:
        covid_df.iloc[index] = multiplier(row, 5, 2)
        continue
    else:
        raise ValueError("One of the severity values must be 1")

In [65]:
print(covid_df.head(20))


    index      Fever  Tiredness  Dry-Cough  Difficulty-in-Breathing  \
0       0   2.846241   3.018200   1.441047                 0.088845   
1       1   3.182810   3.081807   2.618098                 1.222165   
2       2   0.249831   1.722238   0.476629                 1.319111   
3       3   5.651155   1.330669   8.086235                 7.059070   
4       4   8.505459   0.749368   3.348887                 4.809854   
5       5   0.878850   3.009650   2.706943                 7.412909   
6       6   5.724332  10.446826  12.540621                 5.224780   
7       7  10.896401   2.833195   4.592786                 8.473242   
8       8   7.943304  12.098439  10.408321                 5.837741   
9       9   4.673250   4.209751   4.745019                 5.243909   
10     10   5.405487   5.546573   5.141729                 7.488492   
11     11   6.108640   3.682513   9.308877                 4.543714   
12     12   2.130696   2.428616   0.986358                 1.101453   
13    

## 2. Graph Building
9 nodes

In [66]:
edges = []
for i in range(len(symptoms)):
        for j in range(i +1, len(symptoms)):
            edges.append((i,j))
triangles = []
for i in range(len(symptoms)):
        for j in range(i +1, len(symptoms)):
            for h in range(j + 1, len(symptoms)):
                triangles.append((i,j, h))
adj_matrix = np.zeros((len(symptoms), len(symptoms)))
curl = np.zeros((len(edges), len(triangles)))
neg_divergence = np.zeros((len(edges), len(symptoms)))
f = np.zeros((len(edges)))
W = np.zeros((len(edges), len(edges)))

In [67]:
print(triangles)
print(edges)

[(0, 1, 2), (0, 1, 3), (0, 1, 4), (0, 1, 5), (0, 1, 6), (0, 1, 7), (0, 1, 8), (0, 2, 3), (0, 2, 4), (0, 2, 5), (0, 2, 6), (0, 2, 7), (0, 2, 8), (0, 3, 4), (0, 3, 5), (0, 3, 6), (0, 3, 7), (0, 3, 8), (0, 4, 5), (0, 4, 6), (0, 4, 7), (0, 4, 8), (0, 5, 6), (0, 5, 7), (0, 5, 8), (0, 6, 7), (0, 6, 8), (0, 7, 8), (1, 2, 3), (1, 2, 4), (1, 2, 5), (1, 2, 6), (1, 2, 7), (1, 2, 8), (1, 3, 4), (1, 3, 5), (1, 3, 6), (1, 3, 7), (1, 3, 8), (1, 4, 5), (1, 4, 6), (1, 4, 7), (1, 4, 8), (1, 5, 6), (1, 5, 7), (1, 5, 8), (1, 6, 7), (1, 6, 8), (1, 7, 8), (2, 3, 4), (2, 3, 5), (2, 3, 6), (2, 3, 7), (2, 3, 8), (2, 4, 5), (2, 4, 6), (2, 4, 7), (2, 4, 8), (2, 5, 6), (2, 5, 7), (2, 5, 8), (2, 6, 7), (2, 6, 8), (2, 7, 8), (3, 4, 5), (3, 4, 6), (3, 4, 7), (3, 4, 8), (3, 5, 6), (3, 5, 7), (3, 5, 8), (3, 6, 7), (3, 6, 8), (3, 7, 8), (4, 5, 6), (4, 5, 7), (4, 5, 8), (4, 6, 7), (4, 6, 8), (4, 7, 8), (5, 6, 7), (5, 6, 8), (5, 7, 8), (6, 7, 8)]
[(0, 1), (0, 2), (0, 3), (0, 4), (0, 5), (0, 6), (0, 7), (0, 8), (1, 2), (1

In [68]:
# f, w
for index, row in covid_df.iterrows():
    for i, edge in enumerate(edges):
        if (row[symptoms[edge[0]]] != 0) and (row[symptoms[edge[1]]] != 0):
            W[i, i] += (row[symptoms[edge[0]]] - row[symptoms[edge[1]]])
            f[i] += 1
for i in range(len(edges)):
    W[i, i] = W[i, i]*1/f[i]

In [69]:
print(f)
print(W)

[ 79200.  59400.  39600.  19800.  36000.  54000.  54000.  36000. 118800.
  79200.  39600.  57600.  86400.  86400.  57600. 118800.  59400.  64800.
  97200.  97200.  64800.  79200.  57600.  86400.  86400.  57600.  36000.
  54000.  54000.  36000.  86400.  57600.  28800. 115200.  57600.  86400.]
[[-0.01491773  0.          0.         ...  0.          0.
   0.        ]
 [ 0.         -0.00601042  0.         ...  0.          0.
   0.        ]
 [ 0.          0.         -0.02647151 ...  0.          0.
   0.        ]
 ...
 [ 0.          0.          0.         ...  0.00768889  0.
   0.        ]
 [ 0.          0.          0.         ...  0.         -0.0003365
   0.        ]
 [ 0.          0.          0.         ...  0.          0.
  -0.01398349]]


In [70]:
# adj_matrix
for index, row in covid_df.iterrows():
    for i in range(len(symptoms)):
        for j in range(i +1, len(symptoms)):
            adj_matrix[i, j] += (row[symptoms[i]] - row[symptoms[j]])/len(covid_df) #divide by #ppl with both symptoms
print(adj_matrix)

[[ 0.         -0.94383165 -1.25711685 -0.94221686 -0.00182082 -0.25983909
  -1.17513755 -1.16873872 -0.26155825]
 [ 0.          0.         -0.3132852   0.00161479  0.94201082  0.68399256
  -0.23130591 -0.22490708  0.68227339]
 [ 0.          0.          0.          0.31489999  1.25529602  0.99727776
   0.0819793   0.08837813  0.9955586 ]
 [ 0.          0.          0.          0.          0.94039603  0.68237777
  -0.23292069 -0.22652186  0.68065861]
 [ 0.          0.          0.          0.          0.         -0.25801826
  -1.17331673 -1.1669179  -0.25973743]
 [ 0.          0.          0.          0.          0.          0.
  -0.91529846 -0.90889963 -0.00171916]
 [ 0.          0.          0.          0.          0.          0.
   0.          0.00639883  0.9135793 ]
 [ 0.          0.          0.          0.          0.          0.
   0.          0.          0.90718047]
 [ 0.          0.          0.          0.          0.          0.
   0.          0.          0.        ]]


In [71]:
# neg_divergence
for i in range(len(edges)):
    for j in range(len(symptoms)):
        if edges[i][0] == j:
            neg_divergence[i,j] = -1
        elif edges[i][1] == j:
            neg_divergence[i,j] = 1
print(neg_divergence)

[[-1.  1.  0.  0.  0.  0.  0.  0.  0.]
 [-1.  0.  1.  0.  0.  0.  0.  0.  0.]
 [-1.  0.  0.  1.  0.  0.  0.  0.  0.]
 [-1.  0.  0.  0.  1.  0.  0.  0.  0.]
 [-1.  0.  0.  0.  0.  1.  0.  0.  0.]
 [-1.  0.  0.  0.  0.  0.  1.  0.  0.]
 [-1.  0.  0.  0.  0.  0.  0.  1.  0.]
 [-1.  0.  0.  0.  0.  0.  0.  0.  1.]
 [ 0. -1.  1.  0.  0.  0.  0.  0.  0.]
 [ 0. -1.  0.  1.  0.  0.  0.  0.  0.]
 [ 0. -1.  0.  0.  1.  0.  0.  0.  0.]
 [ 0. -1.  0.  0.  0.  1.  0.  0.  0.]
 [ 0. -1.  0.  0.  0.  0.  1.  0.  0.]
 [ 0. -1.  0.  0.  0.  0.  0.  1.  0.]
 [ 0. -1.  0.  0.  0.  0.  0.  0.  1.]
 [ 0.  0. -1.  1.  0.  0.  0.  0.  0.]
 [ 0.  0. -1.  0.  1.  0.  0.  0.  0.]
 [ 0.  0. -1.  0.  0.  1.  0.  0.  0.]
 [ 0.  0. -1.  0.  0.  0.  1.  0.  0.]
 [ 0.  0. -1.  0.  0.  0.  0.  1.  0.]
 [ 0.  0. -1.  0.  0.  0.  0.  0.  1.]
 [ 0.  0.  0. -1.  1.  0.  0.  0.  0.]
 [ 0.  0.  0. -1.  0.  1.  0.  0.  0.]
 [ 0.  0.  0. -1.  0.  0.  1.  0.  0.]
 [ 0.  0.  0. -1.  0.  0.  0.  1.  0.]
 [ 0.  0.  0. -1.  0.  0.

In [72]:
# curl
for j, tri in enumerate(triangles):
    for i, edge in enumerate(edges):
        if edge[0] in tri and edge[1] in tri:
            first_edge = tri.index(edge[0])
            if (first_edge + 1) % 3 == tri.index(edge[1]):
                curl[i, j] = 1
            else:
                curl[i,j] = -1
print(curl)

[[ 1.  1.  1. ...  0.  0.  0.]
 [-1.  0.  0. ...  0.  0.  0.]
 [ 0. -1.  0. ...  0.  0.  0.]
 ...
 [ 0.  0.  0. ...  0.  0.  1.]
 [ 0.  0.  0. ...  1.  0. -1.]
 [ 0.  0.  0. ...  0.  1.  1.]]


### Solving for r

In [73]:
right_side = np.matmul(np.transpose(neg_divergence), np.matmul(W, f))
left_side = np.matmul(np.matmul(np.transpose(neg_divergence), W), neg_divergence)
r = np.matmul(np.linalg.inv(left_side), right_side)
print(r)

[ -65536. -229376. -540672.  131072.  737280. -294912.       0.   98304.
   65536.]


### Solving for c

In [74]:
right_side = np.matmul(np.transpose(curl), np.matmul(W, f))
left_side = np.matmul(np.matmul(np.transpose(curl), W), curl)
c = np.matmul(np.linalg.inv(left_side), right_side)
print(c)

[ 15728640. -13107200.  -7733248.   6733824.   2359296.   -983040.
  -1966080.  26214400.   6815744.  -8847360. -10485760.   3145728.
   1572864.  -1572864.   6160384.  -7602176.  12582912.   2228224.
   8912896.  -3014656. -12058624.   -393216.  14155776.   4587520.
  -5242880.   7667712. -11796480.  12582912.  -4194304.  -4194304.
  -4718592.   8486912. -18087936.  11272192.   3276800.  14155776.
  -4980736.   1015808.  -7864320.  -5242880.   7340032.   2064384.
   3145728.    786432.  -2883584.  -1081344.   1048576.   5931008.
 -14680064.   8912896. -12910592.   2916352. -12058624.    -65536.
   6553600.  -7864320.   7208960.   3276800.   7864320.    -65536.
  -1064960.  -5636096.   9961472.   5767168.  -1605632.   6160384.
 -10485760.  -2162688.  -9437184.    278528.  -1572864.   8912896.
   -983040.  -2031616. -24117248.   6553600.   8388608. -17301504.
  -4980736.   -499712.   5636096.   3145728.  -4063232.   1966080.]
