In [2]:
import pandas as pd
import pandasql as pdsql
from datetime import datetime
import matplotlib
import matplotlib.pyplot as plt

pysql = lambda q: pdsql.sqldf(q, globals())

data = pd.read_csv('data/amico-export.csv.gz', compression='gzip')
data['date'] = pd.to_datetime(data['date'])
data['vt_query'] = pd.to_datetime(data['vt_query'])

In [3]:
sql_query = """
    SELECT *
    FROM data 
    WHERE server LIKE '45.77.%.%'
        AND type = 'APK'
    ORDER BY date;
    """

clapks = pysql(sql_query)
print "Number of malicious downloads =", len(clapks)
print "MIN(date) =", min(clapks['date'])
print "MAX(date) =", max(clapks['date'])

clapks


Number of malicious downloads = 135
MIN(date) = 2017-08-11 00:00:00.000000
MAX(date) = 2017-12-19 00:00:00.000000


Unnamed: 0,dump_id,date,md5,host,server,type,max_tavs,max_avs,score,vt_query
0,1406348,2017-08-11 00:00:00.000000,aa69fc1075f375c1486657fa42900416,us.girlsexcellentvideo.cyxdnpqxxg,45.77.97.45,APK,,,0.430,2017-08-11 00:00:00.000000
1,1406031,2017-08-11 00:00:00.000000,4b2987db9e55c80eda7da4b9ab272909,us.analvideowonderfulxxxgirls.cyadgjgigh,45.77.97.45,APK,,,0.483,2017-08-11 00:00:00.000000
2,1406637,2017-08-12 00:00:00.000000,56f5731587a104d9bdebc4aab9eb25ae,us.videobeautifulxxxgirls.wmxvmcpg4z,45.77.97.45,APK,,,0.540,2017-08-12 00:00:00.000000
3,1408242,2017-08-14 00:00:00.000000,5acaf99fb29468e980adf0888fddfb54,online.goodxxlvideoanalgirl.syaeiuzsv6,45.77.97.45,APK,,,0.581,2017-08-14 00:00:00.000000
4,1407978,2017-08-14 00:00:00.000000,4587322f8dcf4bb4b66fc456980fedaf,us.goodxxxvideoanalgirl.cmadkmfld4,45.77.97.45,APK,,,0.636,2017-08-14 00:00:00.000000
5,1407262,2017-08-14 00:00:00.000000,7fde4b2a3a382a2f958eb633df67354c,us.videobeautifultitsgirl.cyqeicpvat,45.77.97.45,APK,,,0.519,2017-08-14 00:00:00.000000
6,1411085,2017-08-15 00:00:00.000000,d7aa90f2155cda99dd87fd4ffbbff856,online.coolvideohotgirl.shxdoyevpn,45.77.97.45,APK,,,0.548,2017-08-15 00:00:00.000000
7,1409013,2017-08-15 00:00:00.000000,84cfcedc6df33860db2c1589c1059f0b,online.titsvideoanalgirl.syqe6dyrnz,45.77.97.45,APK,,,0.562,2017-08-15 00:00:00.000000
8,1417476,2017-08-16 00:00:00.000000,99cd7f8ec8db26f6b5957477598e43c2,us.coolbestgirlvideo.cyqepb4dzh,45.77.97.45,APK,,,0.509,2017-08-16 00:00:00.000000
9,1411195,2017-08-16 00:00:00.000000,d6f27922c6c2aff57f126392f4803cbb,online.coolvideohotgirls.syxelyzgbg,45.77.97.45,APK,,,0.543,2017-08-16 00:00:00.000000


In [4]:
sql_query = """
    SELECT host,
           COUNT(*) AS downloads, 
           COUNT(DISTINCT md5) AS md5s,
           COUNT(DISTINCT server) AS server_IPs
    FROM clapks 
    GROUP BY host
    ORDER BY downloads DESC, md5s DESC;
    """

clapks_stats = pysql(sql_query)
print "Number of host names =", len(clapks_stats)
clapks_stats[:20]

Number of host names = 129


Unnamed: 0,host,downloads,md5s,server_IPs
0,us.privatehomevideobrunettegirls.wmam5ixauk,2,2,1
1,site.analvideobestgirls.wyxunj4moh,2,1,1
2,us.beautifulhomegirlvideo.cyahk6vt5i,2,1,1
3,us.privatehomevideobrunettegirls.smxh54pw4k,2,1,1
4,us.privatehomevideobrunettegirls.wmah5uzdzs,2,1,1
5,us.wonderfulhomevideobrunettegirl.syxmkocx7h,2,1,1
6,online.beautifulhomevideohotgirl.whxoessxrd,1,1,1
7,online.beautifulhomevideohotgirls.shxlvafpmh,1,1,1
8,online.bestcoolvideohotgirl.whalrasp6s,1,1,1
9,online.bestgirlssuckvideo.wmxnsbpeli,1,1,1


In [5]:
sql_query = """
    SELECT server,
           COUNT(*) AS downloads, 
           COUNT(DISTINCT md5) AS md5s,
           COUNT(DISTINCT host) AS hosts
    FROM clapks 
    GROUP BY server
    ORDER BY downloads DESC, md5s DESC;
    """

clapks_stats = pysql(sql_query)
clapks_stats

Unnamed: 0,server,downloads,md5s,hosts
0,45.77.118.239,80,65,74
1,45.77.97.45,50,50,50
2,45.77.86.80,4,4,4
3,45.77.93.135,1,1,1


In [6]:
sql_query = """
    SELECT max_tavs AS tavs, 
           COUNT(*) AS downloads,
           COUNT(DISTINCT md5) AS md5s,
           AVG(score), MAX(score)
    FROM clapks 
    GROUP BY max_tavs
    ORDER BY max_tavs DESC;
    """

clapks_stats = pysql(sql_query)
clapks_stats

Unnamed: 0,tavs,downloads,md5s,AVG(score),MAX(score)
0,,135,120,0.420748,0.636


In [7]:
sql_query = """
    SELECT max_avs AS avs, 
           COUNT(*) AS downloads,
           COUNT(DISTINCT md5) AS md5s,
           AVG(score), MAX(score)
    FROM clapks 
    GROUP BY max_avs
    ORDER BY max_avs DESC;
    """

clapks_stats = pysql(sql_query)
clapks_stats

Unnamed: 0,avs,downloads,md5s,AVG(score),MAX(score)
0,,135,120,0.420748,0.636


In [8]:
sql_query = """
    SELECT md5, COUNT(*) AS count
    FROM clapks 
    WHERE max_tavs IS NULL
    GROUP BY md5
    ORDER BY count DESC
    """

clapks_stats = pysql(sql_query)
print "Number of MD5s =", len(clapks_stats)
clapks_stats[:10]

Number of MD5s = 120


Unnamed: 0,md5,count
0,2479875ea7515e5fefe536a22632073f,4
1,38445ae40686146fa54a2b3cf3c836ea,4
2,512e55aef3b7f275d3b6da4adf2daee1,4
3,5a6329d9ae17b7cee8e50a32bb8770db,3
4,4f98c62afad27bb1063f66b3340bc0c2,2
5,7e01d1aa6207d524c163b8caf3201cfb,2
6,bb19dedcb69c3c8530a403c07463ea0c,2
7,fe8a3acb0d23c1cd40089ba104784868,2
8,03339021e35b3edd5a2a03fbeee4577e,1
9,09c0d2cf8a8da1ed3102123bf6a1717a,1


In [12]:
sql_query = """
    SELECT COUNT(*) AS detected_downloads, 
           COUNT(DISTINCT md5) AS detected_md5s
    FROM clapks 
    WHERE max_tavs IS NULL
        AND score > 0.5
    """

clapks_stats = pysql(sql_query)
clapks_stats

Unnamed: 0,detected_downloads,detected_md5s
0,21,21
