In [1]:
import pandas as pd
import requests
import numpy as np
import json

In [2]:
# Change lineages to the ones you want to compare
lineage = ['BA.2.75', 'BQ.1']

# Minimum percentage of samples in GISAID that should have the SNP to be considered. (Default = 90%)
thr = 0.9

In [3]:
"""
Code adapted from sc2rf to access lapis json of SNPs.
Filters SNPs over threshold.
"""
list_df = []

for i in lineage:
    url = f'https://lapis.cov-spectrum.org/open/v1/sample/nuc-mutations?pangoLineage={i}&downloadAsFile=false&dataFormat=json'
    print(f"Url is {url}")
    r = requests.get(url)
    result = r.json()
    df = pd.json_normalize(result['data'])
    df.drop(df[df['proportion'] < thr].index, inplace=True)
    df.drop(columns=['proportion', 'count'], inplace=True)
    list_df.append(df)

Url is https://lapis.cov-spectrum.org/open/v1/sample/nuc-mutations?pangoLineage=BA.2.75&downloadAsFile=false&dataFormat=json
Url is https://lapis.cov-spectrum.org/open/v1/sample/nuc-mutations?pangoLineage=BQ.1&downloadAsFile=false&dataFormat=json


In [4]:
"""
Creates 2 dfs:
    - df1: Corresponds to SNPs specific of lineage 1
    - df2: Corresponds to SNPs specific of lineage 2
"""
df = list_df[0].merge(list_df[1], on='mutation', how='outer', indicator=True)
new_df = df.drop(df[df['_merge'] == 'both'].index)

print('Specific SNPs of', lineage[0])
df1 = new_df[new_df['_merge'] == 'left_only']
if len(new_df) > 0:
    print(df1['mutation'], '\n\n')

print('Specific SNPs of', lineage[1])
df2 = new_df[new_df['_merge'] == 'right_only']
if len(new_df) > 0:
    print(df2['mutation'])

Specific SNPs of BA.2.75
29     A27259C
51     G15451A
55     G22898A
56     G27382C
57     A27383T
58     T27384C
118     C9866T
119    C26858T
120    C25416T
121    G22331A
122     C4586T
123    A12444G
124    A22001G
125    G22577C
126    T22942G
127    A26275G
128    T22016C
129     C3796T
130    C22033A
131    A22190G
132     C3927T
133     C5183T
Name: mutation, dtype: object 


Specific SNPs of BQ.1
134    T14257C
135    T21765-
136    C28312T
137    A21766-
138    C21767-
139    A21768-
140    T21769-
141    G21770-
142    T22917G
143    G12160A
144    T23018G
145    G26529A
146    C27889T
147     T2954C
148    G16935A
149    C11750T
150    T22942A
151    G28681T
152     C1931A
153    A22893C
Name: mutation, dtype: object
