Skip to content

Commit

Permalink
Update script to tabulate single nodes.
Browse files Browse the repository at this point in the history
  • Loading branch information
minimaxir committed Dec 14, 2016
1 parent 0b123a5 commit c5c9071
Show file tree
Hide file tree
Showing 2 changed files with 189 additions and 17 deletions.
52 changes: 35 additions & 17 deletions hibp_service_edges.py
@@ -1,34 +1,52 @@
import csv
import itertools
import json
from collections import Counter

input_path = "/Users/maxwoolf/Downloads" \
"/HIBP Consolidated and Anonymised Data" \
"/HIBP Consolidated and Anonymised Data.txt"

# service_dict is a dictionary with a string key.
service_dict = {}

# edge_dict is a dictionary with tuple keys.
edge_dict = {}

with open(input_path, 'rb') as f:
for entry in f:
services = entry.split(' ')[0].split(';')
count = int(entry.split(' ')[1].rstrip())

services.sort() # Ensure edges are in correct order

for service in services:
if service in service_dict:
service_dict[service] += count
else:
service_dict[service] = count

if len(services) > 1:
# edges is a list of tuples
edges = list(itertools.combinations(services, 2))
for edge in edges:
if edge in edge_dict:
edge_dict[edge] += count
else:
edge_dict[edge] = count

# output should be close to public numbers:
# https://haveibeenpwned.com/PwnedWebsites

with open('hibp_services.csv', 'wb') as file:
writer = csv.writer(file)
writer.writerow(["Service", "Total"])

for key, value in service_dict.iteritems():
writer.writerow([key, value])

with open('hibp_edges.csv', 'wb') as file:
writer = csv.writer(file)
writer.writerow(["Source", "Target", "Weight"])

with open(input_path, 'rb') as f:
for entry in f:
if entry.find(';') > 0:
services = entry.split(' ')[0].split(';')
count = int(entry.split(' ')[1].rstrip())

services.sort() # Ensure edges are in correct order

# edges is a list of tuples
edges = list(itertools.combinations(services, 2))
for edge in edges:
if edge in edge_dict:
edge_dict[edge] += count
else:
edge_dict[edge] = count

for key, value in edge_dict.iteritems():
writer.writerow([key[0], key[1], value])
154 changes: 154 additions & 0 deletions hibp_services.csv
@@ -0,0 +1,154 @@
Service,Total
HackingTeam,32310
HackForums,191773
MinecraftPocketEditionForum,16034
Comcast,616882
Experian,7196890
000webhost,13445467
Vodafone,56021
NexusMods,11779871
WHMCS,134047
Trillian,3827238
BitTorrent,34236
Interpals,3439414
Plex,327314
QatarNationalBank,88678
HeroesOfGaia,179967
AstroPID,5789
Lifeboat,7089395
LizardSquad,13451
GeekedIn,1073164
R2Games,22231152
NetEase,234841563
126,6414191
MajorGeeks,269548
CrackCommunity,19216
WarInc,1020136
Adobe,152479936
MoDaCo,879703
SumoTorrent,285188
OwnedCore,880331
CheapAssGamer,444767
Seedpeer,281919
MailRu,4820827
HeroesOfNewerth,8089103
Minefield,188343
Tianya,29020808
Tesco,2239
GTAGaming,197184
Aipai,6496778
Mac-Torrents,93922
Duowan,2639894
COMELEC,228605
Aternos,1436486
MuslimDirectory,37782
Avast,422959
ClixSense,2424784
Dominos,648231
PixelFederation,38108
SpecialKSpamList,30741620
GamerzPlanet,1217166
Sony,37103
Nival,1535473
Yandex,1186564
SkTorrent,117070
BusinessAcumen,22747
DLH,3264710
eThekwiniMunicipality,81830
iDressup,2181565
DDO,1580933
mSpy,699792
iMesh,49467477
Dodonew,8718404
PS3Hax,447410
GameTuts,2064274
Nulled,599076
Tumblr,76067055
Gamigo,8163604
Pokebip,425445
StarNet,139395
QuantumBooter,48592
IGF,3200
Flashback,40256
Rambler,91356923
MoneyBookers,4483605
CannabisForum,227812
Leet,5081689
Teracod,97136
Verified,17009
Insanelyi,104162
ServerPact,73587
ThisHabboForum,612369
VBulletin,518966
AndroidForums,745355
Lastfm,37217679
SCDailyPhoneSpamList,32939105
Spirol,55614
Boxee,158163
WPT,148366
Uiggy,2682650
Neteller,3619948
xat,5968783
Yahoo,453427
BlackHatWorld,777387
XSplit,2983472
Lookbook,1074948
FFShrine,620677
PokemonCreed,116460
SprashivaiRu,3474763
Xbox-Scene,432552
PSX-Scene,341118
CivilOnline,7830195
Gawker,532661
BattlefieldHeroes,523
Bell,12653
MPGH,3122898
TRAI,107128
Snapchat,5
MinecraftWorldMap,71018
KMRU,1476783
TruckersMP,83643
BTSec,4789418
AhaShare,180468
17Media,4009640
LinkedIn,160074951
Malwarebytes,111623
Forbes,1057261
Win7Vista,202859
Nihonomaru,1697282
PaddyPower,590954
GFAN,22526334
LOTR,1141278
NextGenUpdate,1194597
Solomid,442166
myRepoSpace,252751
Dropbox,68648009
Onverse,800157
FoxyBingo,252216
WIIUISO,458155
UnrealEngine,530147
VK,93338601
LoungeBoard,41212
ModernBusinessSolutions,58843488
uTorrent,395044
MangaTraders,855236
Taobao,21149008
ArmyForceOnline,1531235
Neopets,26892897
AcneOrg,432943
MyVidster,19863
Hemmakvall,47297
gPotato,2136520
iPmart,2460787
BlueSnapRegpack,104977
PHPFreaks,173891
WildStar,738556
MySpace,359409507
LinuxMint,145349
Warframe,819478
Patreon,2330380
EpicGames,251661
Stratfor,862329
Hemmelig,26071
FlashFlashRevolution,1771845
BigMoneyJobs,36789

0 comments on commit c5c9071

Please sign in to comment.