# SSL Interception

## Set Up

In [2]:
import pyspark
from pyspark.sql import SparkSession
import pandas as pd
import os
from tldextract import extract
from pandas.core.frame import DataFrame

In [10]:
SQLContext = SparkSession.builder.master("local[1]") \
                .appName("session-0") \
                .getOrCreate()
SQLContext.conf.set("spark.sql.execution.arrow.pyspark.enabled", "true")

In [14]:
#Path to parquet file directory
data = "/home/ubuntu/parquet_data/2021-12-28.parquet"
df = SQLContext.read.parquet(data)
df = spark_df.filter(spark_df["anon_orig"] == "uva")

In [15]:
#df = spark_df.toPandas()
#df

## Create Whitelist

* It's unlikely that SSL Interception proxies will use a public trust CA
* To get a whitelist of public trust CAs, we filter out validation status == OK
* Then we save the issuer organization

In [16]:
df.columns

['_lpp_ver',
 'anon_orig',
 'anon_resp',
 'cert_chain_fuids',
 'cipher',
 'client_cert_chain_fuids',
 'client_issuer',
 'client_subject',
 'curve',
 'established',
 'id_orig_h',
 'id_orig_p',
 'id_resp_h',
 'id_resp_p',
 'issuer',
 'ja3',
 'ja3s',
 'last_alert',
 'next_protocol',
 'resumed',
 'server_name',
 'subject',
 'ts',
 'uid',
 'validation_status',
 'version']

In [18]:
#print(df.iloc[4]["issuer"])
#print(df.iloc[254]["server_name"])

In [19]:
def get_issuer_org(issuer_str: str):
    if type(issuer_str) == str:
        o_index = issuer_str.find("O=")
        comma_index = issuer_str.find(",", o_index)
        #Get characters starting from after O= and before ,
        org_str = issuer_str[o_index + 2 :comma_index]
        return org_str.replace("\\", "")

def get_domain(server_name_str: str):
    if type(server_name_str) == str:
        domain = extract(server_name_str).domain
        #tld - top level domain
        tld = extract(server_name_str).suffix
        return f"{domain}.{tld}"

In [20]:
df["issuer_O"] = df["issuer"].apply(lambda issuer_str:get_issuer_org(issuer_str))
df["domain"] = df["server_name"].apply(lambda server_name_str:get_domain(server_name_str))
df.head(10)

TypeError: 'Column' object is not callable

In [9]:
df[df["issuer_O"] == "Cloudflare"]

Unnamed: 0,_lpp_ver,anon_orig,anon_resp,cert_chain_fuids,cipher,client_cert_chain_fuids,client_issuer,client_subject,curve,established,...,next_protocol,resumed,server_name,subject,ts,uid,validation_status,version,issuer_O,domain
970,1.11-1,uva,none,"[FKD1Iw1nzWQNC17IJ7, FnSrBA1Q7EEtkxsrZj]",TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,[],,,x25519,True,...,h2,False,metadata.provider.plex.tv,"CN=plex.tv,O=Cloudflare\, Inc.,L=San Francisco...",2021-12-28T14:31:30.760917Z,CWaZvS2Ms7ytHr6BP1,ok,TLSv12,Cloudflare,plex.tv
1467,1.11-1,uva,none,"[FxuovG1Fdg1kSq9DHe, FU8mCz1fbRU9fwX0zg]",TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,[],,,x25519,True,...,h2,False,otf.zendesk.com,"CN=otf.zendesk.com,O=Cloudflare\, Inc.,L=San F...",2021-12-28T14:32:16.174148Z,Cxv9g34x5CWIw3BDji,ok,TLSv12,Cloudflare,zendesk.com
1809,1.11-1,uva,none,"[F73xVj1sU8xjHZDTwh, FvE79T1ANZCPN5WxM9]",TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305_SHA256,[],,,,True,...,,False,www.cochranelibrary.com,"CN=sni.cloudflaressl.com,O=Cloudflare\, Inc.,L...",2021-12-28T14:32:46.347308Z,C4oGtM4je9z20iy47,ok,TLSv12,Cloudflare,cochranelibrary.com
2330,1.11-1,uva,none,"[FHRB2d3PYSOHrXplAd, FkvsU92x7efmBKlEYl]",TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,[],,,x25519,True,...,h2,False,router.infolinks.com,"CN=sni.cloudflaressl.com,O=Cloudflare\, Inc.,L...",2021-12-28T14:33:32.219378Z,C6KsalzVPtJqdUgfe,ok,TLSv12,Cloudflare,infolinks.com
2843,1.11-1,uva,none,"[FBIoPt42BmklQEV6Mi, FNlLwJ3fcDfChyrO48]",TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,[],,,x25519,True,...,h2,False,browser-update.org,"CN=sni.cloudflaressl.com,O=Cloudflare\, Inc.,L...",2021-12-28T14:34:20.652952Z,CKW1Qm6A6raJmli4b,ok,TLSv12,Cloudflare,browser-update.org
5076,1.11-1,uva,none,"[FwaPh13DZSS4vxria8, Fq9vQLYJMJzLBmD9c]",TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,[],,,x25519,True,...,h2,False,static.getclicky.com,"CN=sni.cloudflaressl.com,O=Cloudflare\, Inc.,L...",2021-12-28T14:37:33.053533Z,C6jZGO3slXobNkuJb,ok,TLSv12,Cloudflare,getclicky.com
5931,1.11-1,uva,none,"[Fek4tE2LzIeXJQucv5, FIT3mS2Asmacy6tU7c]",TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,[],,,x25519,True,...,h2,False,kessel-api.parsec.app,"CN=parsec.app,O=Cloudflare\, Inc.,L=San Franci...",2021-12-28T14:38:49.365574Z,Cc3JYa1FOV8qwln1V8,ok,TLSv12,Cloudflare,parsec.app
6164,1.11-1,uva,none,"[FSS4qz1ht68ScsZN57, FCXRHu3mOVUHqQwwvc]",TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,[],,,x25519,True,...,h2,False,p.adsymptotic.com,"CN=sni.cloudflaressl.com,O=Cloudflare\, Inc.,L...",2021-12-28T14:39:09.665795Z,CJkPga2QInSYya4v7a,ok,TLSv12,Cloudflare,adsymptotic.com
7099,1.11-1,uva,none,"[F01VAI1PR5HAKRew4a, FBk3vs17qDS5cFHyQf]",TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,[],,,x25519,True,...,h2,False,amna01mstra8s42prod.dxcloud.episerver.net,"CN=sni.cloudflaressl.com,O=Cloudflare\, Inc.,L...",2021-12-28T14:40:35.260813Z,C2zzCm4Etnre7E2uW8,ok,TLSv12,Cloudflare,episerver.net
7853,1.11-1,uva,none,"[FxyUhD77J5ruw4aU6, FDI63P2wM51KsG4A5i]",TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,[],,,x25519,True,...,h2,False,img.webmd.com,"CN=img.webmd.com,O=Cloudflare\, Inc.,L=San Fra...",2021-12-28T14:41:48.763554Z,Cgdudt1JfHj5dmiUwk,ok,TLSv12,Cloudflare,webmd.com


In [10]:
print(df.iloc[5]["issuer"])

None


In [11]:
df_ok = df[df["validation_status"] == "ok"]
df_orgs = df_ok.groupby(["issuer_O"])["server_name"].agg("count").reset_index(name="count").sort_values(by="count", ascending=False)
df_orgs.head(10)

Unnamed: 0,issuer_O,count
9,DigiCert Inc,2613
22,Microsoft Corporation,1574
2,Amazon,1392
27,Starfield Technologies,564
26,Sectigo Limited,380
14,GoDaddy.com,363
3,Apple Inc.,336
16,Google Trust Services LLC,180
13,GlobalSign nv-sa,164
11,Entrust,140


In [12]:
def gen_whitelist(whitelist_path: str, df_orgs: pd.core.frame.DataFrame):
    whitelist_file = open(whitelist_path, "r+")
    whitelist_orgs = set(whitelist_file.read().splitlines())
    for org in df_orgs["issuer_O"]:
        if org not in whitelist_orgs:
            whitelist_orgs.add(org)
            whitelist_file.write(org + "\n")
    return whitelist_orgs

In [13]:
whitelist = gen_whitelist("whitelist.txt", df_orgs)

## Parse for Unable to Get Local Certificate

In [14]:
def get_validation_status_df(df: DataFrame, validation_status: str):
    return df[df["validation_status"] == validation_status]

In [15]:
df_unable = get_validation_status_df(df, "unable to get local issuer certificate")
df_unable

Unnamed: 0,_lpp_ver,anon_orig,anon_resp,cert_chain_fuids,cipher,client_cert_chain_fuids,client_issuer,client_subject,curve,established,...,next_protocol,resumed,server_name,subject,ts,uid,validation_status,version,issuer_O,domain
25,1.11-1,uva,none,"[Ff9I0d2ou2pEX2OS66, Fk1k8NmDbTbx6t9Od]",TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384,[],,,secp384r1,True,...,,False,v10.events.data.microsoft.com,"CN=*.events.data.microsoft.com,OU=WSE,O=Micros...",2021-12-28T14:30:00.744078Z,CkABp430NRXBhRd1O5,unable to get local issuer certificate,TLSv12,Microsoft Corporation,microsoft.com
26,1.11-1,uva,none,"[FxGxlv1eMW6vsQ0737, F2mekw14U0T6bYWtQi]",TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384,[],,,secp384r1,True,...,h2,False,array802.prod.do.dsp.mp.microsoft.com,"CN=*.prod.do.dsp.mp.microsoft.com,OU=DSP,O=Mic...",2021-12-28T14:30:00.708941Z,C0jph610J1pMdHabL1,unable to get local issuer certificate,TLSv12,Microsoft Corporation,microsoft.com
27,1.11-1,uva,none,"[FPNDTsyjgaXMURfFh, FbaxTr3jwqJQ0r1FWg]",TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384,[],,,secp384r1,True,...,,False,v10.events.data.microsoft.com,"CN=*.events.data.microsoft.com,OU=WSE,O=Micros...",2021-12-28T14:30:01.034601Z,Cas9McfM0Pe1s8cwc,unable to get local issuer certificate,TLSv12,Microsoft Corporation,microsoft.com
39,1.11-1,uva,none,"[Fxqs6p1GAreWnUzv8, FFOBT02fcAN6whxE66]",TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,[],,,x25519,True,...,h2,False,settings-win.data.microsoft.com,"CN=settings-win.data.microsoft.com,OU=WSE,O=Mi...",2021-12-28T14:30:02.370812Z,Cl6j0G4Vqaaaw1xxRl,unable to get local issuer certificate,TLSv12,Microsoft Corporation,microsoft.com
41,1.11-1,uva,none,"[FIAgL64Te7S2jBVpn1, FHD0QYUsX1ANpU4A2]",TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384,[],,,secp384r1,True,...,,False,us-v20.events.data.microsoft.com,"CN=*.events.data.microsoft.com,OU=WSE,O=Micros...",2021-12-28T14:30:02.170327Z,CfB8j12llTlJ4tB93,unable to get local issuer certificate,TLSv12,Microsoft Corporation,microsoft.com
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
20049,1.11-1,uva,none,"[FjpSPm3k0xozrHTvr5, FUATDW2TLRu8uTvBc]",TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,[],,,x25519,True,...,h2,False,settings-win.data.microsoft.com,"CN=settings-win.data.microsoft.com,OU=WSE,O=Mi...",2021-12-28T14:59:52.604423Z,CAuLKTQks6FsFsHD1,unable to get local issuer certificate,TLSv12,Microsoft Corporation,microsoft.com
20067,1.11-1,uva,none,"[FAJ7FC13G0i0Cr1Je8, F9NTGa3tSRWXGterTk]",TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384,[],,,secp384r1,True,...,,False,v10.events.data.microsoft.com,"CN=*.events.data.microsoft.com,OU=WSE,O=Micros...",2021-12-28T14:59:54.470599Z,CFGPSo4WVFNgZstSLg,unable to get local issuer certificate,TLSv12,Microsoft Corporation,microsoft.com
20068,1.11-1,uva,none,"[FTaD2e1Bf4mJxO0oi3, FnEV1OvmmRoJUoUSk]",TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,[],,,x25519,True,...,h2,False,settings-win.data.microsoft.com,"CN=settings-win.data.microsoft.com,OU=WSE,O=Mi...",2021-12-28T14:59:55.173665Z,CDu5lL3UlbAnDrVsg5,unable to get local issuer certificate,TLSv12,Microsoft Corporation,microsoft.com
20071,1.11-1,uva,none,"[FnQATK1Sxn7HCgbx1d, FVN7wYxipGNTOjXc9]",TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384,[],,,secp384r1,True,...,,False,v10.events.data.microsoft.com,"CN=*.events.data.microsoft.com,OU=WSE,O=Micros...",2021-12-28T14:59:54.843187Z,CfMk1L35ujtR276Ys6,unable to get local issuer certificate,TLSv12,Microsoft Corporation,microsoft.com


In [16]:
df_unable = df_unable[~(df_unable["issuer_O"].isin(whitelist))]
df_unable

Unnamed: 0,_lpp_ver,anon_orig,anon_resp,cert_chain_fuids,cipher,client_cert_chain_fuids,client_issuer,client_subject,curve,established,...,next_protocol,resumed,server_name,subject,ts,uid,validation_status,version,issuer_O,domain
927,1.11-1,uva,none,"[FnSGUk3C6p3QNMcnqk, Ft5ylG1S8KmAGa0n8h]",TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,[],,,secp256r1,True,...,,False,scribe.logs.roku.com,"CN=scribe.logs.roku.com,O=Roku\, Inc.,L=San Jo...",2021-12-28T14:31:26.533336Z,CXg0Mn1tnfheetmkzf,unable to get local issuer certificate,TLSv12,Roku,roku.com
2034,1.11-1,uva,none,"[FKuJgM3xjnWajqcf63, FJVfTu4SwVNqusYei]",TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA,[],,,secp256r1,True,...,,False,api-global.netflix.com,"CN=api.netflix.com,O=Netflix\, Inc.,L=Los Gato...",2021-12-28T14:33:05.139184Z,CAAcDcULmTO6yj26i,unable to get local issuer certificate,TLSv10,Netflix,netflix.com
2284,1.11-1,uva,none,"[FBF7Cb26vtA2WdTvUa, Fgebvk1YvZobcMTSSd]",TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384,[F9Uc4N1ieWArXgcx0k],"emailAddress=wDCSb1nb@redhat.com,CN=EfA 4V6 5d...",CN=39JXLSpu-65DQ-1kjr-4BuJ-3NGEF6A0NwWg,secp256r1,True,...,,False,,"emailAddress=wDCSb1nb@redhat.com,CN=4B3LrxyIGB...",2021-12-28T14:33:28.241852Z,CW1y881PeKreXoGvxc,unable to get local issuer certificate,TLSv12,Red Hat,
2296,1.11-1,uva,none,"[FTHm8I2CKCphJL8yLe, F6CxPILpLM2280AP2]",TLS_DHE_RSA_WITH_AES_256_GCM_SHA384,[],,,,True,...,,False,epicsfd.connecticutchildrens.org,"CN=epicsfd.connecticutchildrens.org,OU=CAREQUA...",2021-12-28T14:33:30.095271Z,CMp0pc8W8pmVP4itf,unable to get local issuer certificate,TLSv12,EMR Direct,connecticutchildrens.org
3533,1.11-1,uva,none,"[FEu2Ta2vwevTQ8cl6k, FuMF9v4eSWs9KOKXXf]",TLS_RSA_WITH_AES_128_GCM_SHA256,"[FjJBYm2NyLcRuqA0ng, FjJbMX2b55Ix5hfnE2, FxupI...","CN=5HMMQusZR 24o 6CXE Tu5 1V,OU=RSA CBSD Mfr C...","CN=1Tm5Y1wqLq7:4xrGjCOm7n2W,OU=WInnForum CBSD ...",,True,...,http/1.1,False,stasas.sascms.net,"CN=sas.sascms.net,OU=CommScope SAS Certificate...",2021-12-28T14:35:20.932834Z,C0hHwO3JOTg3byCjHj,unable to get local issuer certificate,TLSv12,CommScope,sascms.net
4150,1.11-1,uva,none,[Fee6JsG2i6pGvX8Cl],TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384,[],,,x25519,True,...,,False,,"O=Infoblox,ST=California,C=US",2021-12-28T14:36:16.561276Z,CAEtxn2WB83pPDG7ij,unable to get local issuer certificate,TLSv12,Infoblox,
4374,1.11-1,uva,none,"[FjZU9b18edGCVjMyAk, Fkr3la252R3rbaO5Ef]",TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,[],,,secp256r1,True,...,,False,api-global.netflix.com,"CN=api.netflix.com,O=Netflix\, Inc.,L=Los Gato...",2021-12-28T14:36:34.932830Z,CITBdq2KuJIGCEbPv4,unable to get local issuer certificate,TLSv12,Netflix,netflix.com
8244,1.11-1,uva,none,"[FlKLeHDPtgLRTrvFg, FMOQ0XUQRkOSwXEF6]",TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,[Fia64r31GfFcadU2rl],CN=4i25mTSax 1jUgVaec9X 3jjsr 1IlTtQ 2GVU1Y 1V...,"CN=7qKBwJJVYfZrYADQAymBWuGvqLIXKYxr,O=JumpCloud",secp256r1,True,...,,False,agent.jumpcloud.com,"CN=agent.jumpcloud.com,OU=DevOps,O=JumpCloud,S...",2021-12-28T14:42:24.264742Z,CHLTFp7kzT2tpjeo7,unable to get local issuer certificate,TLSv12,JumpCloud,jumpcloud.com
8965,1.11-1,uva,none,"[Fax6kqb6YTtJM2hT2, F16sDK1Fih1h73Q4d6]",TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,[FlDmSaEWbHgoWrKp2],CN=4i25mTSax 1jUgVaec9X 3jjsr 1IlTtQ 2GVU1Y 1V...,"CN=jyyGfFCivvjw0JXBerOxLmwBdVZIk4hi,O=JumpCloud",secp256r1,True,...,,False,agent.jumpcloud.com,"CN=agent.jumpcloud.com,OU=DevOps,O=JumpCloud,S...",2021-12-28T14:43:29.928148Z,CqWQcP2acOSc1ykrmg,unable to get local issuer certificate,TLSv12,JumpCloud,jumpcloud.com
9281,1.11-1,uva,none,[Feo4de4yOfPUwuoFql],TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384,[],,,x25519,True,...,,False,,"O=Infoblox,ST=California,C=US",2021-12-28T14:43:56.624702Z,CxJIBonCb7AMwWyBg,unable to get local issuer certificate,TLSv12,Infoblox,
