# Template for all parsing
```python
@require(iter_json_gzip,'ujson','datetime','re','gzip','csv')
def DUMPNAME(file_in,out_base,compress=1):
    headers = [] # list of column names
    out_filename = out_base.format('DUMPNAME')
    
    # Things to remmember as having "seen"
    seen = set()
    running = {}
    with gzip.open(out_filename,'wt',compress) as out_file:
        csv_writer = csv.writer(out_file)
        csv_writer.writerow(headers)
        for json in iter_json_gzip(file_in):
            # get some attribute, cast to strting if need be
            # what you're "remmebering" can also be a tuple (not list, they're not hashable)
            SOMEFIELD = json.get('SOMEFIELD','')
            if isinstance(category,str):
                category = category.strip()
                # double check that it's a valid field, non empty and not seen, etc.
                if running.get(json['app_id'],'')!=SOMEFIELD and len(SOMEFIELD)>0:
                    # SOME CODE HERE
                    csv_writer.writerow(obs)
    return out_filename
```

# Initialize Environment

### Compatibility notes

The following code is only compatible with a python 3.4 kernel and the notebook must be opened with ipython 3.0+.

The following libraries must be installed to run this code:

```pip install ujson```

In [1]:
from pdb import set_trace as debug
from pandas.io.parsers import read_csv
from IPython.parallel import Client,require
from collections import Counter

In [3]:
import datetime
import re
import csv
import gzip

In [6]:
#--> RAW_FILE_DIR with the directory that contains google_play__main.json.gz
#--> OUT_DIR with the directory of where you want the processed files to go
#--> DEBUG whether to print out created CSV files or not
#--> COMPRESS_LEVEL gzip compression level. warning: severely impacts runtime.
#--> LINE_LIMIT how many lines to iterate through in the raw file. For debugging.
DUMP_DATE = '2015_02_26_23_12'
RAW_FILE_DIR = '/home/cgaray/data'
OUT_DIR = '/home/cgaray/data/rui'
DEBUG = 0
COMPRESS_LEVEL = 9
LINE_LIMIT = -1
PARALLEL = 1
PIGZ = 1

In [7]:
# Don't change these!
RAW_FILE = '{}/apple_ios__main.json.gz'.format(RAW_FILE_DIR)
OUT_BASE = OUT_DIR+'/{}__apple_ios__main__'+DUMP_DATE+'.csv.gz'

In [13]:
if PIGZ:
    @require(gzip)
    def iter_json_gzip(filename,LINE_LIMIT=LINE_LIMIT):
        return gzip.iter_json_gzip(filename,LINE_LIMIT=LINE_LIMIT)
        
else:
    import datautils
    @require('datautils')
    def iter_json_gzip(filename,LINE_LIMIT=LINE_LIMIT):
        return datautils.iter_json_gzip(filename,LINE_LIMIT=LINE_LIMIT)

In [14]:
# Print a couple observations to inspect daw data.
# !zcat! "$RAW_FILE"|head -n 1000000 | tail -n5

# Initialize Parallel Computing

In [15]:
if PARALLEL:
    ipython_parallel = Client()
    print("{} active computing engines".format(len(ipython_parallel.ids)))

    lbv = ipython_parallel.load_balanced_view()

    map = lambda f,itertable:lbv.map(f,itertable,\
    block =False,\
    ordered =False)

    @require('socket')
    def host(dummy):
        return socket.gethostname()
    
    nodes = list(lbv.map(host,ipython_parallel.ids))
    nodes = [int(x.split('equity')[1].split(".")[0]) for x in nodes]
    nodes = nodes

    print(sorted(list(Counter(nodes).items())))

36 active computing engines
[(2, 4), (6, 4), (8, 4), (12, 6), (13, 6), (17, 12)]


# NEWWEBSITE

In [8]:
@require(iter_json_gzip,gzip,datetime,re,csv)
def NEWWEBSITE(file_in,out_base,compress=1):
    headers = ['app_id','date','url']
    out_filename = out_base.format('NEWWEBSITE')
    running = {}
    with gzip.open(out_filename,'wt',compress) as out_file:
        csv_writer = csv.writer(out_file)
        csv_writer.writerow(headers)
        http = re.compile(r"(http://)|(https://)")
        www = re.compile(r"^www\.")
        ending = re.compile(r"\.(a-z)(.*?)^")
        bad = re.compile(r"\.[a-z]+(.*?)$")
        for json in iter_json_gzip(file_in):
            website = json.get('websiteUrl','')
            if isinstance(website,str):
                website = http.sub('',website).lower()
                website = www.sub('',website)
                website = website.split(r"/")[0]
                bad_str = bad.findall(website)
                if bad_str:
                    website = website.replace(bad_str[0],'')
                if running.get(json['app_id'],'')!=website and len(website)>0:
                    running[json['app_id']] = website
                    day = datetime.datetime.\
                    fromtimestamp(int(json['timestamp']))\
                            .strftime('%Y-%m-%d').strip()
                    obs = (json['app_id'],day,website)
                    csv_writer.writerow(obs)
    return out_filename

In [9]:
df_debug = None
if DEBUG:
    df_debug = read_csv(NEWWEBSITE(RAW_FILE,OUT_BASE,COMPRESS_LEVEL),compression='gzip',header=0)
df_debug

# NEWSUPPORTURL

In [10]:
@require(iter_json_gzip,gzip,datetime,re,csv)
def NEWSUPPORTURL(file_in,out_base,compress=1):
    headers = ['app_id','date','support_url']
    out_filename = out_base.format('NEWSUPPORTURL')
    running = {}
    with gzip.open(out_filename,'wt',compress) as out_file:
        csv_writer = csv.writer(out_file)
        csv_writer.writerow(headers)
        http = re.compile(r"(http://)|(https://)")
        www = re.compile(r"^www\.")
        ending = re.compile(r"\.(a-z)(.*?)^")
        bad = re.compile(r"\.[a-z]+(.*?)$")
        for json in iter_json_gzip(file_in):
            website = json.get('supportUrl','')
            if isinstance(website,str):
                website = http.sub('',website).lower()
                website = www.sub('',website)
                website = website.split(r"/")[0]
                bad_str = bad.findall(website)
                if bad_str:
                    website = website.replace(bad_str[0],'')
                if running.get(json['app_id'],'')!=website and len(website)>0:
                    running[json['app_id']] = website
                    day = datetime.datetime.\
                    fromtimestamp(int(json['timestamp']))\
                            .strftime('%Y-%m-%d').strip()
                    obs = (json['app_id'],day,website)
                    csv_writer.writerow(obs)
    return out_filename

In [11]:
df_debug = None
if DEBUG:
    df_debug = read_csv(NEWSUPPORTURL(RAW_FILE,OUT_BASE,COMPRESS_LEVEL),compression='gzip',header=0)
df_debug

# NEWSIMILAR5

In [12]:
@require(iter_json_gzip,gzip,datetime,re,csv)
def NEWSIMILAR5(file_in,out_base,compress=1,number_similar = 5):
    headers = ['app_id','date']+ ['related{}'.format(x) \
                                  for x in range(1,number_similar+1)]
    out_filename = out_base.format('NEWSIMILAR{}'.format(number_similar))
    running = {}
    with gzip.open(out_filename,'wt',9) as out_file:
        csv_writer = csv.writer(out_file)
        csv_writer.writerow(headers)
        for json in iter_json_gzip(file_in):
            related = json.get('customersAlsoBoughtApps',[])
            if related:
                related = sorted(map(str, related[:number_similar]))
                joined = ",".join(related)
                if running.get(json['app_id'],'')!=joined:
                    running[json['app_id']] = joined
                    day = datetime.datetime.\
                    fromtimestamp(int(json['timestamp']))\
                            .strftime('%Y-%m-%d').strip()
                    csv_writer.writerow([json['app_id'],day]+ related)
    return out_filename

In [13]:
df_debug = None
if DEBUG:
    df_debug = read_csv(NEWSIMILAR5(RAW_FILE,OUT_BASE,COMPRESS_LEVEL),compression='gzip',header=0)
df_debug

#NEWSIMILAR10

In [14]:
@require(NEWSIMILAR5)
def NEWSIMILAR10(RAW_FILE,OUT_BASE,COMPRESS_LEVEL):
    return NEWSIMILAR5(RAW_FILE,OUT_BASE,COMPRESS_LEVEL,10)

In [15]:
df_debug = None
if DEBUG:
    df_debug = read_csv(NEWSIMILAR10(RAW_FILE,OUT_BASE,COMPRESS_LEVEL),compression='gzip',header=0)
df_debug

# NEWSIMILAR15

In [16]:
@require(NEWSIMILAR5)
def NEWSIMILAR15(RAW_FILE,OUT_BASE,COMPRESS_LEVEL):
    return NEWSIMILAR5(RAW_FILE,OUT_BASE,COMPRESS_LEVEL,15)

In [17]:
df_debug = None
if DEBUG:
    df_debug = read_csv(NEWSIMILAR15(RAW_FILE,OUT_BASE,COMPRESS_LEVEL),compression='gzip',header=0)
df_debug

# DAILYRATINGS

In [18]:
@require(iter_json_gzip,gzip,datetime,re,csv)
def DAILYRATINGS(file_in,out_base,compress=1):
    headers = ['app_id','date','rating5','rating4','rating3','rating2','rating1']
    out_filename = out_base.format('DAILYRATINGS')
    unique_daily_rating = set()
    with gzip.open(out_filename,'wt',9) as out_file:
        csv_writer = csv.writer(out_file)
        csv_writer.writerow(headers)
        for json in iter_json_gzip(file_in):
            day = datetime.datetime.fromtimestamp(int(json['timestamp']))\
                                .strftime('%Y-%m-%d')
            rating = json.get('ratingCountList',None)
            if rating:
                obs = (json['app_id'],day,tuple(rating),)
                if hash(obs) not in unique_daily_rating:
                    unique_daily_rating.add(hash(obs))
                    csv_writer.writerow([json['app_id'],day]+rating)
    return out_filename

In [19]:
df_debug = None
if DEBUG:
    df_debug = read_csv(DAILYRATINGS(RAW_FILE,OUT_BASE,COMPRESS_LEVEL),compression='gzip',header=0)
df_debug

# DAILYRATINGSCURRENT

In [20]:
@require(iter_json_gzip,gzip,datetime,re,csv)
def DAILYRATINGSCURRENT(file_in,out_base,compress=1):
    headers = ['app_id','date','rating5','rating4','rating3','rating2','rating1']
    out_filename = out_base.format('DAILYRATINGSCURRENT')
    unique_daily_rating = set()
    with gzip.open(out_filename,'wt',compress) as out_file:
        csv_writer = csv.writer(out_file)
        csv_writer.writerow(headers)
        for json in iter_json_gzip(file_in):
            day = datetime.datetime.fromtimestamp(int(json['timestamp']))\
                                .strftime('%Y-%m-%d')
            rating = json.get('ratingCountList_current',None)
            if rating:
                obs = (json['app_id'],day,tuple(rating),)
                if hash(obs) not in unique_daily_rating:
                    unique_daily_rating.add(hash(obs))
                    csv_writer.writerow([json['app_id'],day]+rating)
    return out_filename

In [21]:
# Will be empty because this variable wasn't scraped for the first couple million observations
df_debug = None
if DEBUG:
    df_debug = read_csv(DAILYRATINGSCURRENT(RAW_FILE,OUT_BASE,COMPRESS_LEVEL),compression='gzip',header=0)
df_debug

# WEEKLYRATINGS

In [22]:
@require(iter_json_gzip,gzip,datetime,re,csv)
def WEEKLYRATINGS(file_in,out_base,compress=1):
    headers = ['app_id','year','week','rating5','rating4','rating3','rating2','rating1']
    out_filename = out_base.format('WEEKLYRATINGS')
    unique_daily_rating = set()
    with gzip.open(out_filename,'wt',9) as out_file:
        csv_writer = csv.writer(out_file)
        csv_writer.writerow(headers)
        for json in iter_json_gzip(file_in):
            year,week = datetime.datetime.fromtimestamp(int(json['timestamp']))\
                                .isocalendar()[:2]
            rating = json.get('ratingCountList',None)
            if rating:
                obs = (json['app_id'],year,week)
                if hash(obs) not in unique_daily_rating:
                    unique_daily_rating.add(hash(obs))
                    csv_writer.writerow([json['app_id'],year,week]+rating)
    return out_filename

In [23]:
df_debug = None
if DEBUG:
    df_debug = read_csv(WEEKLYRATINGS(RAW_FILE,OUT_BASE,COMPRESS_LEVEL),compression='gzip',header=0)
df_debug

# WEEKLYRATINGSCURRENT

In [24]:
@require(iter_json_gzip,gzip,datetime,re,csv)
def WEEKLYRATINGSCURRENT(file_in,out_base,compress=1):
    headers = ['app_id','year','week','rating5','rating4','rating3','rating2','rating1']
    out_filename = out_base.format('WEEKLYRATINGSCURRENT')
    unique_daily_rating = set()
    with gzip.open(out_filename,'wt',compress) as out_file:
        csv_writer = csv.writer(out_file)
        csv_writer.writerow(headers)
        for json in iter_json_gzip(file_in):
            year,week = datetime.datetime.fromtimestamp(int(json['timestamp']))\
                                .isocalendar()[:2]
            rating = json.get('ratingCountList_current',None)
            if rating:
                obs = (json['app_id'],year,week)
                if hash(obs) not in unique_daily_rating:
                    unique_daily_rating.add(hash(obs))
                    csv_writer.writerow([json['app_id'],year,week]+rating)
    return out_filename

In [25]:
df_debug = None
if DEBUG:
    df_debug = read_csv(WEEKLYRATINGSCURRENT(RAW_FILE,OUT_BASE,COMPRESS_LEVEL),compression='gzip',header=0)
df_debug

# MONTHLYRATINGS

In [26]:
@require(iter_json_gzip,gzip,datetime,re,csv)
def MONTHLYRATINGS(file_in,out_base,compress=1):
    headers = ['app_id','year','month','rating5','rating4','rating3','rating2','rating1']
    out_filename = out_base.format('MONTHLYRATINGS')
    unique_daily_rating = set()
    with gzip.open(out_filename,'wt',9) as out_file:
        csv_writer = csv.writer(out_file)
        csv_writer.writerow(headers)
        for json in iter_json_gzip(file_in):
            dateobj = datetime.datetime.fromtimestamp(int(json['timestamp']))
            year,month = dateobj.year,dateobj.month
            rating = json.get('ratingCountList',None)
            if rating:
                obs = (json['app_id'],year,month)
                if hash(obs) not in unique_daily_rating:
                    unique_daily_rating.add(hash(obs))
                    csv_writer.writerow([json['app_id'],year,month]+rating)
    return out_filename

In [27]:
df_debug = None
if DEBUG:
    df_debug = read_csv(MONTHLYRATINGS(RAW_FILE,OUT_BASE,COMPRESS_LEVEL),compression='gzip',header=0)
df_debug

# NEWDEVELOPER

In [28]:
@require(iter_json_gzip,gzip,datetime,re,csv)
def NEWDEVELOPER(file_in,out_base,compress=1):
    headers = ['app_id','date','dev_id']
    out_filename = out_base.format('NEWDEVELOPER')
    running = {}
    with gzip.open(out_filename,'wt',compress) as out_file:
        csv_writer = csv.writer(out_file)
        csv_writer.writerow(headers)
        for json in iter_json_gzip(file_in):
            developer = str(json.get('artist_id',''))
            developer = developer.strip()
            if running.get(json['app_id'],'')!=developer \
                        and len(developer)>0 \
                        and developer.isdigit():
                running[json['app_id']] = developer
                day = datetime.datetime.\
                fromtimestamp(int(json['timestamp']))\
                        .strftime('%Y-%m-%d').strip()
                obs = (json['app_id'],day,developer)
                csv_writer.writerow(obs)
    return out_filename

In [29]:
df_debug = None
if DEBUG:
    df_debug = read_csv(NEWDEVELOPER(RAW_FILE,OUT_BASE,COMPRESS_LEVEL),compression='gzip',header=0)
df_debug

# NEWCATEGORY

In [30]:
@require(iter_json_gzip,gzip,datetime,re,csv)
def NEWCATEGORY(file_in,out_base,compress=1):
    headers = ['app_id','date']+ ['category{}'.format(x) \
                                  for x in range(1,5)]
    out_filename = out_base.format('NEWCATEGORY')
    running = {}
    with gzip.open(out_filename,'wt',compress) as out_file:
        csv_writer = csv.writer(out_file)
        csv_writer.writerow(headers)
        for json in iter_json_gzip(file_in):
            category = json.get('categories',[])[:4]
            joined = ",".join(category)
            if running.get(json['app_id'],'')!=joined:
                running[json['app_id']] = joined
                day = datetime.datetime.\
                fromtimestamp(int(json['timestamp']))\
                        .strftime('%Y-%m-%d').strip()
                csv_writer.writerow([json['app_id'],day]+ category)
    return out_filename

In [31]:
df_debug = None
if DEBUG:
    df_debug = read_csv(NEWCATEGORY(RAW_FILE,OUT_BASE,COMPRESS_LEVEL),compression='gzip',header=0)
df_debug

# NEWPRICE

In [32]:
@require(iter_json_gzip,gzip,datetime,re,csv)
def NEWPRICE(file_in,out_base,compress=1):
    headers = ['app_id','date','price']
    out_filename = out_base.format('NEWPRICE')
    running = {}
    with gzip.open(out_filename,'wt',compress) as out_file:
        csv_writer = csv.writer(out_file)
        csv_writer.writerow(headers)
        for json in iter_json_gzip(file_in):
            price = str(json.get('price','')).replace("$","")
            if running.get(json['app_id'],'')!=price:
                running[json['app_id']] = price
                day = datetime.datetime.\
                fromtimestamp(int(json['timestamp']))\
                        .strftime('%Y-%m-%d').strip()
                obs = (json['app_id'],day,price)
                csv_writer.writerow(obs) 
    return out_filename

In [33]:
df_debug = None
if DEBUG:
    df_debug = read_csv(NEWPRICE(RAW_FILE,OUT_BASE,COMPRESS_LEVEL),compression='gzip',header=0)
df_debug

# ALLVERSIONS

In [34]:
@require(iter_json_gzip,gzip,datetime,re,csv)
def ALLVERSIONS(file_in,out_base,compress=1):
    headers = ['app_id','date','version_string',\
#                'release_notes'\
              ]
    out_filename = out_base.format('ALLVERSIONS')
    unique_ver = set()
    with gzip.open(out_filename,'wt',compress) as out_file:
        csv_writer = csv.writer(out_file)
        csv_writer.writerow(headers)
        for json in iter_json_gzip(file_in):
            for each_version in json.get('version',[]):
                # need extra code because Apple changed format
                if 'release-date' in each_version:
                    date = each_version.get('release-date','')
                    date = datetime.datetime.strptime(date,'%b %d, %Y')
                    date = date.strftime('%Y-%m-%d')
                    version_string = each_version.get('version-string','')
                    release_notes = each_version.get('release-notes','')
                elif 'releaseDate' in each_version:
                    date = each_version.get('releaseDate','')
                    date = datetime.datetime.strptime(date,'%Y-%m-%dT%H:%M:%SZ')
                    date = date.strftime('%Y-%m-%d')
                    version_string = each_version.get('versionString','')
                    release_notes = each_version.get('releaseNotes','')
                obs_u = (json['app_id'],date,version_string)
                if hash(obs_u) not in unique_ver:
                    unique_ver.add(hash(obs_u))
                    obs = (json['app_id'],date,
                           version_string,
#                            release_notes,
                          )
                    csv_writer.writerow(obs)
    return out_filename

In [35]:
df_debug = None
if DEBUG:
    df_debug = read_csv(ALLVERSIONS(RAW_FILE,OUT_BASE,COMPRESS_LEVEL),compression='gzip',header=0)
df_debug

# NEWREQ

In [36]:
@require(iter_json_gzip,gzip,datetime,re,csv)
def NEWREQ(file_in,out_base,compress=1):
    headers = ['app_id','date','requires','optimized']
    out_filename = out_base.format('NEWREQ')
    with gzip.open(out_filename,'wt',compress) as out_file:
        csv_writer = csv.writer(out_file, dialect='excel')
        csv_writer.writerow(headers)
        running = {}
        and_re = re.compile(r"and ")
        com_with_re = re.compile(r"Compatible with ")
        optimize_re = re.compile(r"This app is optimized for ")
        for json in iter_json_gzip(file_in):
            requires = json.get('requirements',None)
            if requires == None:
                continue
            re_split = requires.split(".")
            requires = com_with_re.sub('',re_split[0]).strip()
            requires = and_re.sub('',requires)
            optimized = None
            if len(re_split)>1:
                optimized = re_split[1].strip()
                optimized = optimize_re.sub("",optimized)
            if running.get(json['app_id'],'')!=json.get('requirements',''):
                running[json['app_id']] = json.get('requirements','')
                day = datetime.datetime.fromtimestamp(int(json['timestamp']))\
                        .strftime('%Y-%m-%d')
                obs = (json['app_id'],day,requires,optimized)
                csv_writer.writerow(obs)
    return out_filename

In [None]:
df_debug = None
if DEBUG:
    df_debug = read_csv(NEWREQ(RAW_FILE,OUT_BASE,COMPRESS_LEVEL),compression='gzip',header=0)
df_debug

# NEWARTIST

In [33]:
@require(iter_json_gzip,gzip,datetime,re,csv)
def NEWARTIST(file_in,out_base,compress=1):
    headers = ['app_id','date','artist_name']
    out_filename = out_base.format('NEWARTIST')
    running = {}
    with gzip.open(out_filename,'wt',compress) as out_file:
        csv_writer = csv.writer(out_file)
        csv_writer.writerow(headers)
        for json in iter_json_gzip(file_in):
            developer = json.get('artistName','')
            if running.get(json['app_id'],'')!=developer:
                running[json['app_id']] = developer
                day = datetime.datetime.\
                fromtimestamp(int(json['timestamp']))\
                        .strftime('%Y-%m-%d').strip()
                obs = (json['app_id'],day,developer)
                csv_writer.writerow(obs)
    return out_filename

In [34]:
df_debug = None
if DEBUG:
    df_debug = read_csv(NEWARTIST(RAW_FILE,OUT_BASE,COMPRESS_LEVEL),compression='gzip',header=0)
df_debug

Unnamed: 0,app_id,date,artist_name


# NEWREVIEW

In [35]:
@require(iter_json_gzip,gzip,datetime,re,csv)
def NEWREVIEW(file_in,out_base,compress=1):
    headers = ['app_id','date','total_reviews']
    out_filename = out_base.format('NEWREVIEW')
    running = {}
    with gzip.open(out_filename,'wt',compress) as out_file:
        csv_writer = csv.writer(out_file)
        csv_writer.writerow(headers)
        for json in iter_json_gzip(file_in):
            total_reviews = str(json.get('totalNumberOfReviews',''))
            if running.get(json['app_id'],'')!= total_reviews:
                running[json['app_id']] = total_reviews
                day = datetime.datetime.\
                fromtimestamp(int(json['timestamp']))\
                        .strftime('%Y-%m-%d').strip()
                obs = (json['app_id'],day,total_reviews)
                csv_writer.writerow(obs)
    return out_filename

In [36]:
df_debug = None
if DEBUG:
    df_debug = read_csv(NEWREVIEW(RAW_FILE,OUT_BASE,COMPRESS_LEVEL),compression='gzip',header=0)
df_debug

Unnamed: 0,app_id,date,total_reviews
0,281656475,2010-09-06,446
1,281704574,2010-09-06,12689
2,281736535,2010-09-06,2862
3,281790044,2010-09-06,1464
4,281796108,2010-09-06,1548
5,281816692,2010-09-06,934
6,281826146,2010-09-06,222
7,281861187,2010-09-06,15
8,281889893,2010-09-06,198
9,281893011,2010-09-06,698


# NEWSELLER

In [37]:
@require(iter_json_gzip,gzip,datetime,re,csv)
def NEWSELLER(file_in,out_base,compress=1):
    headers = ['app_id','date','seller']
    out_filename = out_base.format('NEWSELLER')
    running = {}
    with gzip.open(out_filename,'wt',compress) as out_file:
        csv_writer = csv.writer(out_file)
        csv_writer.writerow(headers)
        for json in iter_json_gzip(file_in):
            developer = '{}'.format(json['seller'])
            if running.get(json['app_id'],None)!=developer:
                running[json['app_id']] = developer
                day = datetime.datetime.\
                fromtimestamp(int(json['timestamp']))\
                        .strftime('%Y-%m-%d').strip()
                obs = (json['app_id'],day,developer)
                csv_writer.writerow(obs)
    return out_filename

In [38]:
df_debug = None
if DEBUG:
    df_debug = read_csv(NEWSELLER(RAW_FILE,OUT_BASE,COMPRESS_LEVEL),compression='gzip',header=0)
df_debug

Unnamed: 0,app_id,date,seller
0,281656475,2010-09-06,Namco Networks America
1,281704574,2010-09-06,AOL
2,281736535,2010-09-06,Pangea Software Inc.
3,281790044,2010-09-06,WHERE Inc.
4,281796108,2010-09-06,Evernote
5,281816692,2010-09-06,Handmark Inc.
6,281826146,2010-09-06,salesforce.com
7,281861187,2010-09-06,Hudson Entertainment
8,281889893,2010-09-06,Hudson Entertainment
9,281893011,2010-09-06,Hudson Entertainment


# Run them all!

In [None]:
# NEWWEBSITE
# NEWSUPPORTURL
# NEWSIMILAR5
# NEWSIMILAR10
# NEWSIMILAR15
# DAILYRATINGS
# DAILYRATINGSCURRENT
# WEEKLYRATINGS
# WEEKLYRATINGSCURRENT
# MONTHLYRATINGS
# NEWDEVELOPER
# NEWCATEGORY
# NEWPRICE
# ALLVERSIONS
# NEWREQ
programs = """
NEWARTIST
NEWREVIEW
NEWSELLER
""".split()

def run_function(f,RAW_FILE=RAW_FILE,OUT_BASE=OUT_BASE,COMPRESS_LEVEL=COMPRESS_LEVEL):
    return f(RAW_FILE ,OUT_BASE,COMPRESS_LEVEL)

for file in map(run_function,[globals()[x] for x in programs]):
    print(file)

In [None]:
print("DONE")