diff --git a/hibp_service_edges.py b/hibp_service_edges.py index 86be9bb..1488f3d 100644 --- a/hibp_service_edges.py +++ b/hibp_service_edges.py @@ -1,34 +1,52 @@ import csv import itertools import json -from collections import Counter input_path = "/Users/maxwoolf/Downloads" \ "/HIBP Consolidated and Anonymised Data" \ "/HIBP Consolidated and Anonymised Data.txt" +# service_dict is a dictionary with a string key. +service_dict = {} + # edge_dict is a dictionary with tuple keys. edge_dict = {} +with open(input_path, 'rb') as f: + for entry in f: + services = entry.split(' ')[0].split(';') + count = int(entry.split(' ')[1].rstrip()) + + services.sort() # Ensure edges are in correct order + + for service in services: + if service in service_dict: + service_dict[service] += count + else: + service_dict[service] = count + + if len(services) > 1: + # edges is a list of tuples + edges = list(itertools.combinations(services, 2)) + for edge in edges: + if edge in edge_dict: + edge_dict[edge] += count + else: + edge_dict[edge] = count + +# output should be close to public numbers: +# https://haveibeenpwned.com/PwnedWebsites + +with open('hibp_services.csv', 'wb') as file: + writer = csv.writer(file) + writer.writerow(["Service", "Total"]) + + for key, value in service_dict.iteritems(): + writer.writerow([key, value]) + with open('hibp_edges.csv', 'wb') as file: writer = csv.writer(file) writer.writerow(["Source", "Target", "Weight"]) - with open(input_path, 'rb') as f: - for entry in f: - if entry.find(';') > 0: - services = entry.split(' ')[0].split(';') - count = int(entry.split(' ')[1].rstrip()) - - services.sort() # Ensure edges are in correct order - - # edges is a list of tuples - edges = list(itertools.combinations(services, 2)) - for edge in edges: - if edge in edge_dict: - edge_dict[edge] += count - else: - edge_dict[edge] = count - for key, value in edge_dict.iteritems(): writer.writerow([key[0], key[1], value]) diff --git a/hibp_services.csv b/hibp_services.csv new file mode 100644 index 0000000..4a6f288 --- /dev/null +++ b/hibp_services.csv @@ -0,0 +1,154 @@ +Service,Total +HackingTeam,32310 +HackForums,191773 +MinecraftPocketEditionForum,16034 +Comcast,616882 +Experian,7196890 +000webhost,13445467 +Vodafone,56021 +NexusMods,11779871 +WHMCS,134047 +Trillian,3827238 +BitTorrent,34236 +Interpals,3439414 +Plex,327314 +QatarNationalBank,88678 +HeroesOfGaia,179967 +AstroPID,5789 +Lifeboat,7089395 +LizardSquad,13451 +GeekedIn,1073164 +R2Games,22231152 +NetEase,234841563 +126,6414191 +MajorGeeks,269548 +CrackCommunity,19216 +WarInc,1020136 +Adobe,152479936 +MoDaCo,879703 +SumoTorrent,285188 +OwnedCore,880331 +CheapAssGamer,444767 +Seedpeer,281919 +MailRu,4820827 +HeroesOfNewerth,8089103 +Minefield,188343 +Tianya,29020808 +Tesco,2239 +GTAGaming,197184 +Aipai,6496778 +Mac-Torrents,93922 +Duowan,2639894 +COMELEC,228605 +Aternos,1436486 +MuslimDirectory,37782 +Avast,422959 +ClixSense,2424784 +Dominos,648231 +PixelFederation,38108 +SpecialKSpamList,30741620 +GamerzPlanet,1217166 +Sony,37103 +Nival,1535473 +Yandex,1186564 +SkTorrent,117070 +BusinessAcumen,22747 +DLH,3264710 +eThekwiniMunicipality,81830 +iDressup,2181565 +DDO,1580933 +mSpy,699792 +iMesh,49467477 +Dodonew,8718404 +PS3Hax,447410 +GameTuts,2064274 +Nulled,599076 +Tumblr,76067055 +Gamigo,8163604 +Pokebip,425445 +StarNet,139395 +QuantumBooter,48592 +IGF,3200 +Flashback,40256 +Rambler,91356923 +MoneyBookers,4483605 +CannabisForum,227812 +Leet,5081689 +Teracod,97136 +Verified,17009 +Insanelyi,104162 +ServerPact,73587 +ThisHabboForum,612369 +VBulletin,518966 +AndroidForums,745355 +Lastfm,37217679 +SCDailyPhoneSpamList,32939105 +Spirol,55614 +Boxee,158163 +WPT,148366 +Uiggy,2682650 +Neteller,3619948 +xat,5968783 +Yahoo,453427 +BlackHatWorld,777387 +XSplit,2983472 +Lookbook,1074948 +FFShrine,620677 +PokemonCreed,116460 +SprashivaiRu,3474763 +Xbox-Scene,432552 +PSX-Scene,341118 +CivilOnline,7830195 +Gawker,532661 +BattlefieldHeroes,523 +Bell,12653 +MPGH,3122898 +TRAI,107128 +Snapchat,5 +MinecraftWorldMap,71018 +KMRU,1476783 +TruckersMP,83643 +BTSec,4789418 +AhaShare,180468 +17Media,4009640 +LinkedIn,160074951 +Malwarebytes,111623 +Forbes,1057261 +Win7Vista,202859 +Nihonomaru,1697282 +PaddyPower,590954 +GFAN,22526334 +LOTR,1141278 +NextGenUpdate,1194597 +Solomid,442166 +myRepoSpace,252751 +Dropbox,68648009 +Onverse,800157 +FoxyBingo,252216 +WIIUISO,458155 +UnrealEngine,530147 +VK,93338601 +LoungeBoard,41212 +ModernBusinessSolutions,58843488 +uTorrent,395044 +MangaTraders,855236 +Taobao,21149008 +ArmyForceOnline,1531235 +Neopets,26892897 +AcneOrg,432943 +MyVidster,19863 +Hemmakvall,47297 +gPotato,2136520 +iPmart,2460787 +BlueSnapRegpack,104977 +PHPFreaks,173891 +WildStar,738556 +MySpace,359409507 +LinuxMint,145349 +Warframe,819478 +Patreon,2330380 +EpicGames,251661 +Stratfor,862329 +Hemmelig,26071 +FlashFlashRevolution,1771845 +BigMoneyJobs,36789