In [1]:
from IPython import display 
import os
from datetime import datetime
import sys
import pickle
from tqdm import tqdm
import itertools
import collections
from collections import Counter, defaultdict

import numpy as np
import pandas as pd

import plotly
import plotly.express as px
import plotly.graph_objects as go

# Custom import
sys.path.append("../viz/")
from generate_distributions_for_data import generate_distribution, calculate_hellinger_distance_between_data_distribution

import warnings
warnings.filterwarnings('ignore')

In [2]:
alpha_items_df = pd.read_csv("../data/non-anonymous/alphabay/items.csv", error_bad_lines=False, 
                    lineterminator='\n', usecols=['marketplace', 'title', 'vendor', 'last_observed', 'prediction', 'ships_to', 'ships_from', 'description']).drop_duplicates()
alpha_feedback_df = pd.read_csv("../data/non-anonymous/alphabay/feedbacks.csv", error_bad_lines=False, 
                    lineterminator='\n', usecols=['reciever', 'order_title', 'order_amount_usd'])
alpha_feedback_df.columns = ['vendor', 'title', 'order_amount_usd']
alpha_df = alpha_items_df.merge(alpha_feedback_df, how = 'inner', on = ['title', 'vendor'])

dreams_items_df = pd.read_csv("../data/non-anonymous/dream/items.csv", error_bad_lines=False, 
                    lineterminator='\n', usecols=['marketplace', 'title', 'vendor', 'last_observed', 'prediction', 'ships_to', 'ships_from', 'description']).drop_duplicates()
dreams_feedback_df = pd.read_csv("../data/non-anonymous/dream/feedbacks.csv", error_bad_lines=False, 
                    lineterminator='\n', usecols=['reciever', 'order_title', 'order_amount_usd'])
dreams_feedback_df.columns = ['vendor', 'title', 'order_amount_usd']
dreams_df = dreams_items_df.merge(dreams_feedback_df, how = 'inner', on = ['title', 'vendor'])

silk_df = pd.read_csv("../data/non-anonymous/silk-road/items.csv", error_bad_lines=False, 
                    lineterminator='\n', usecols=['marketplace', 'date', 'title', 'seller_id', 'category', 'ship_to', 'ship_from', 'listing_description', 'price_btc']).drop_duplicates()
silk_df.columns = ['marketplace' , 'last_observed', 'title', 'prediction', 'order_amount_usd', 'ships_to', 'ships_from', 'vendor', 'description']
silk_df['order_amount_usd'] = silk_df['order_amount_usd'].apply(lambda x: x*54.46)

In [3]:
alpha_df['last_observed'] = alpha_df['last_observed'].apply(lambda x : str(x).split("-")[0])
dreams_df['last_observed'] = dreams_df['last_observed'].apply(lambda x : str(x).split("-")[0])
silk_df['last_observed'] = silk_df['last_observed'].apply(lambda x : datetime.fromtimestamp(x).year)

# Alphabay dataset

In [4]:
alpha_2015 = alpha_df[alpha_df['last_observed']=='2015']
alpha_2016 = alpha_df[alpha_df['last_observed']=='2016']
alpha_2017 = alpha_df[alpha_df['last_observed']=='2017']

In [7]:
alpha_2015_vendor = alpha_2015.vendor.unique()
alpha_2016_vendor = alpha_2016.vendor.unique()
alpha_2017_vendor = alpha_2017.vendor.unique()
shared_alpha_vendors = set(alpha_2015_vendor) & set(alpha_2016_vendor) & set(alpha_2017_vendor)
print("Total no of shared vendors : ", len(shared_alpha_vendors))

Total no of shared vendors :  620


In [6]:
alpha_2015_df_shared = alpha_2015[alpha_2015['vendor'].isin(shared_alpha_vendors)]
alpha_2016_df_shared = alpha_2016[alpha_2016['vendor'].isin(shared_alpha_vendors)]
alpha_2017_df_shared = alpha_2017[alpha_2017['vendor'].isin(shared_alpha_vendors)] 

In [43]:
print("Years :", alpha_df['last_observed'].unique())
alpha_2017.shape[0] + alpha_2015.shape[0]

Years : ['2017' '2015' '2016']


1263063

# Dreams dataset

In [19]:
dream_df = dreams_df[dreams_df['marketplace']=='Dream']

In [20]:
dream_2017 = dream_df[dream_df['last_observed']=='2017']
dream_2018 = dream_df[dream_df['last_observed']=='2018']
dream_1970 = dream_df[dream_df['last_observed']=='1970']

dream_2017_vendor = dream_2017.vendor.unique()
dream_2018_vendor = dream_2018.vendor.unique()
dream_1970_vendor = dream_1970.vendor.unique()
shared_dream_vendors = set(dream_2017_vendor) & set(dream_2018_vendor)
print("Total no of shared vendors : ", len(shared_dream_vendors))

dream_2017_df_shared = dream_2017[dream_2017['vendor'].isin(shared_dream_vendors)]
dream_2018_df_shared = dream_2018[dream_2018['vendor'].isin(shared_dream_vendors)]
dream_1970_df_shared = dream_1970[dream_1970['vendor'].isin(shared_dream_vendors)] 

print("Years :", dream_df['last_observed'].unique())
dream_2017.shape[0]+ dream_2018.shape[0]

Total no of shared vendors :  877
Years : ['2017' '2018' '1970']


1509525

# Valhalla dataset

In [10]:
valhalla_df = dreams_df[dreams_df['marketplace']=='Valhalla']

In [11]:
print("Years :", valhalla_df['last_observed'].unique())
valhalla_df.shape

Years : ['2017']


(20423, 9)

# Traderoute dataset

In [12]:
traderoute_df = dreams_df[dreams_df['marketplace']=='Traderoute']

In [46]:
traderoute_2016 = traderoute_df[traderoute_df['last_observed']=='2016']
traderoute_2017 = traderoute_df[traderoute_df['last_observed']=='2017']

traderoute_2016_vendor = traderoute_2016.vendor.unique()
traderoute_2017_vendor = traderoute_2017.vendor.unique()
shared_traderoute_vendors = set(traderoute_2016_vendor) & set(traderoute_2017_vendor)
print("Total no of shared vendors : ", len(shared_traderoute_vendors))

traderoute_2016_df_shared = traderoute_2016[traderoute_2016['vendor'].isin(shared_traderoute_vendors)]
traderoute_2017_df_shared = traderoute_2017[traderoute_2017['vendor'].isin(shared_traderoute_vendors)] 

print("Years :", traderoute_df['last_observed'].unique())
traderoute_2016.shape[0] + traderoute_2017.shape[0]

Total no of shared vendors :  5
Years : ['2017' '2016']


265250

# Berlusconi dataset

In [14]:
berlusconi_df = dreams_df[dreams_df['marketplace']=='Berlusconi']

In [47]:
berlusconi_2017 = berlusconi_df[berlusconi_df['last_observed']=='2017']
berlusconi_2018 = berlusconi_df[berlusconi_df['last_observed']=='2018']

berlusconi_2017_vendor = berlusconi_2017.vendor.unique()
berlusconi_2018_vendor = berlusconi_2018.vendor.unique()
shared_berlusconi_vendors = set(berlusconi_2017_vendor) & set(berlusconi_2018_vendor)
print("Total no of shared vendors : ", len(shared_berlusconi_vendors))

berlusconi_2017_df_shared = berlusconi_2017[berlusconi_2017['vendor'].isin(shared_berlusconi_vendors)]
berlusconi_2018_df_shared = berlusconi_2018[berlusconi_2018['vendor'].isin(shared_berlusconi_vendors)]

print("Years :", berlusconi_df['last_observed'].unique())
berlusconi_2017.shape[0] + berlusconi_2018.shape[0]

Total no of shared vendors :  68
Years : ['2018' '2017']


21656

# Silk-Road Dataset

In [49]:
silk_2012 = silk_df[silk_df['last_observed']==2012]
silk_2013 = silk_df[silk_df['last_observed']==2013]

silk_2012_vendor = silk_2012.vendor.unique()
silk_2013_vendor = silk_2013.vendor.unique()
shared_silk_vendors = set(silk_2012_vendor) & set(silk_2013_vendor)
print("Total no of shared vendors : ", len(shared_silk_vendors))

silk_2012_df_shared = silk_2012[silk_2012['vendor'].isin(shared_silk_vendors)]
silk_2013_df_shared = silk_2013[silk_2013['vendor'].isin(shared_silk_vendors)]

print("Years :", silk_df['last_observed'].unique())
silk_2012.shape[0] + silk_2013.shape[0]

Total no of shared vendors :  421
Years : [2012 2013]


1065801

# Generating the stats

In [59]:
layer1_market_display_list = ['Alphabay', '', '', 'Dreams', '', '', 'Valhalla', 'Traderoute', '', 'Berlusconi', '', 'Silk-Road 1', '']
layer1_market_count_list = [alpha_df.shape[0], '', '', dream_df.shape[0], '', '', valhalla_df.shape[0], traderoute_df.shape[0], '', berlusconi_df.shape[0], '', silk_df.shape[0], '']

layer2_year_display_list = ['2015', '2016', '2017', '1970', '2017', '2018', '2017', '2016', '2017', '2017', '2018', '2012','2013']
layer2_year_count_list = [alpha_2015.shape[0], alpha_2016.shape[0], alpha_2017.shape[0], dream_1970.shape[0], dream_2017.shape[0], dream_2018.shape[0], valhalla_df.shape[0], traderoute_2016.shape[0], traderoute_2017.shape[0], berlusconi_2017.shape[0], berlusconi_2018.shape[0], silk_2012.shape[0], silk_2013.shape[0]]

layer_temp = ['2015-16', '2016-17', '']
layer_temp = [1221]

layer3_sybil_count_list = [620, '', '', '', 877, '', 0, 5, '', 68, '', 421, '']
layer3_year_count_list = [alpha_2015_df_shared.shape[0], alpha_2016_df_shared.shape[0], alpha_2017_df_shared.shape[0], dream_1970_df_shared.shape[0], dream_2017_df_shared.shape[0], dream_2018_df_shared.shape[0], valhalla_df.shape[0], traderoute_2016_df_shared.shape[0], traderoute_2017_df_shared.shape[0], berlusconi_2017_df_shared.shape[0], berlusconi_2018_df_shared.shape[0], silk_2012_df_shared.shape[0], silk_2013_df_shared.shape[0]]

In [60]:
df = pd.DataFrame({'MARKET':layer1_market_display_list, 'TOTAL ADS':layer1_market_count_list, 'YEARS':layer2_year_display_list, 'ADS/YEAR':layer2_year_count_list, 'SYBILS':layer3_sybil_count_list, 'SHARED ADS/YEAR':layer3_year_count_list})

In [35]:
df['TOTAL ADS'] = df['TOTAL ADS'].fillna(0)
df['SYBILS'] = df['SYBILS'].fillna(0)

In [40]:
fig = px.sunburst(df, path=['MARKET', 'TOTAL ADS', 'YEARS', 'ADS/YEAR', 'SYBILS', 'SHARED ADS/YEAR'])
fig.show()

In [62]:
df.set_index('MARKET')

Unnamed: 0_level_0,TOTAL ADS,YEARS,ADS/YEAR,SYBILS,SHARED ADS/YEAR
MARKET,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Alphabay,1771258.0,2015,85826,620.0,31525
,,2016,508195,,206974
,,2017,1177237,,507488
Dreams,1509526.0,1970,1,,1
,,2017,556454,877.0,190285
,,2018,953071,,614471
Valhalla,20423.0,2017,20423,0.0,20423
Traderoute,265250.0,2016,13,5.0,13
,,2017,265237,,2916
Berlusconi,21656.0,2017,2425,68.0,2051


In [21]:
alpha_vendors = alpha_df.vendor.unique()
dream_vendors = dream_df.vendor.unique()
silk_vendors = silk_df.vendor.unique()

In [37]:
a = alpha_df[alpha_df['vendor']=='Angelina']
a[a['prediction']=='Prescription'][['title','description']].drop_duplicates().head()

Unnamed: 0,title,description
133430,Generic Levitra - Vardenafil Citrate - 20mg - ...,Generic Levitra - Vardenafil Citrate - 20mg\r ...
184081,Seroquel (Quetiapine) 200mg - 100x,Seroquel (Quetiapine) 200mg\r Temporary availa...
361327,Zofran (Ondansetron) 4mg + Clonidine 0.1mg Wit...,Zofran (Ondansetron) 4mg\rClonidine 0.3mg\r DE...
368671,SOMA (Carisoprodol) 350mg - ProSoma Brand - 100x,ProSoma SOMA (Carisoprolol) 350mg\r DESCRIPTI...
410503,Seroquel (Quetiapine) 200mg - 200x,Seroquel (Quetiapine) 200mg\r Temporary availa...


In [38]:
alpha_2016[alpha_2015['vendor']=='03welle'][['title','description']].drop_duplicates()

Unnamed: 0,title,description
257193,Gabapentin (Neurontin) - 5 x 300mg,This is a mainstay drug for migraines and Gaba...
257662,Gabapentin (Neurontin) - 10 x 300mg,This is a mainstay drug for migraines and Gaba...
258138,Propranolol (Inderal) - Beta-Blocker - 1 x 80mg,Propranolol is a sympatholytic non-selective b...
258440,Propranolol (Inderal) - Beta-Blocker - 10 x 80mg,Propranolol is a sympatholytic non-selective b...
260044,Propranolol (Inderal) - Beta-Blocker - 5 x 80mg,Propranolol is a sympatholytic non-selective b...


In [39]:
c = dream_df[dream_df['vendor']=='Angelina']
c[c['prediction']=='Prescription'][['title','description']].drop_duplicates().head()

Unnamed: 0,title,description
52171,Zofran and Clonidine Withdrawl Kit - 20x,DESCRIPTION\r\n--------------------\r\nZofran ...
207476,Lunesta 3mg - 100x,Generic Lunesta (Eszopiclone) 3mg\r\n\r\nDESCR...
589280,SOMA (Carisoprolol) 350mg - 50x,Generic SOMA (Carisoprolol) 350mg\r\n\r\nDESCR...
652309,"Orlistat (Xenical, weight loss) 120mg X 200","Generic Orlistat (Xenical, weight loss) 120mg\..."
663648,SOMA (Carisoprolol) 350mg - 200x,Generic SOMA (Carisoprolol) 350mg\r\n\r\nDESCR...
