In [3]:
import ijson 
import statistics
import re

## Queries

In [6]:
def printYearlyUserNum(list):
    ## year -> # of users
    year_map = {}
    for usr in list:
        year = re.search(r'\d+', usr["registered"]).group()
        if year in year_map:
            year_map[year] += 1
        else:
            year_map[year] = 1
    
    print('Number of users registered in each year:')
    for key in year_map:
        print("%s: %s" % (key, year_map[key]))

In [7]:
def printMedianNumOfFriends(list):
    numOfFriends = []
    for usr in list:
        numOfFriends.append(len(usr["friends"]))
    print("Median for Number of Friends")
    print(statistics.median(numOfFriends))

In [26]:
def printMedianAge(list):
    ages = []
    for usr in list:
        ages.append(usr["age"])
    print("Median Age of Users")
    print(statistics.median(ages))

In [30]:
def printMeanBalance(list):
    balances = []
    for usr in list:
        balances.append(float(re.sub(r'[^\d.]', '', usr["balance"])))
    print("Median of User Balance Amount")
    print(statistics.mean(balances))
            

In [10]:
def printMeanNumOf_ActiveFemaleUnreads(list):
    # afu -- active female unread 
    afu_list = []
    for usr in list:
        if usr["isActive"] != True or usr["gender"] != "female":
            continue
        afu_list.append(int(re.search(r'\d+', usr["greeting"]).group()))
    print("Mean for number of Unread messages for Active females")
    print(statistics.mean(afu_list))

## Json Streaming 

In [37]:
def jsonStream(file, count, cur_list, all_list):
    curJson = {}
    curKey = ''
    PRINT = False
    with open(file, 'r') as f:
        for prefix, event, value in ijson.parse(f):
            if count > 0 and count % 1000 == 0 and PRINT:
                print("User" + str(count - 999) + " to " + str(count) + ":")
                printYearlyUserNum(cur_list)
                printMedianNumOfFriends(cur_list)
                printMedianAge(cur_list)
                printMeanBalance(cur_list)
                printMeanNumOf_ActiveFemaleUnreads(cur_list)
                cur_list = []
                PRINT = False
            elif (prefix, event) == ('item', 'end_map'):
                cur_list.append(curJson)
                all_list.append(curJson)
                count += 1
                curJson = {}
                PRINT = True
            elif (prefix, event) == ('item', 'map_key') and value != 'friends':
                curKey = value
            elif (prefix, event) == ('item', 'map_key') and value == 'friends':
                curKey = value
                curJson[curKey] = []
            elif prefix.endswith('.' + curKey) and curKey != 'friends':
                curJson[curKey] = value
            elif (prefix.endswith('.friends.item.name')):
                friendName = {}
                friendName['name'] = value
                curJson[curKey].append(friendName)
    return count, cur_list, all_list

## Run

In [43]:
count, cur_list, all_list = jsonStream('./data/users-1.json', 0, [], [])
count, cur_list, all_list = jsonStream('./data/users-2.json', count, cur_list, all_list)

print("The Summary is : ")
printYearlyUserNum(all_list)
printMedianNumOfFriends(all_list)
printMedianAge(all_list)
printMeanBalance(all_list)
printMeanNumOf_ActiveFemaleUnreads(all_list)


User1 to 1000:
Number of users registered in each year:
2012: 149
2013: 176
2017: 151
2015: 171
2016: 170
2014: 169
2018: 14
Median for Number of Friends
6.0
Median Age of Users
35.0
Median of User Balance Amount
5101.74961
Mean for number of Unread messages for Active females
47.88340807174888
User1001 to 2000:
Number of users registered in each year:
2017: 163
2018: 20
2012: 173
2015: 152
2016: 173
2013: 180
2014: 139
Median for Number of Friends
6.0
Median Age of Users
35.0
Median of User Balance Amount
5276.89888
Mean for number of Unread messages for Active females
49.66101694915254
User2001 to 3000:
Number of users registered in each year:
2015: 157
2014: 167
2016: 167
2017: 159
2012: 165
2013: 160
2018: 25
Median for Number of Friends
6.0
Median Age of Users
35.0
Median of User Balance Amount
5210.21453
Mean for number of Unread messages for Active females
51.67078189300412
User3001 to 4000:
Number of users registered in each year:
2015: 163
2017: 162
2016: 166
2012: 166
2013: 1

User28001 to 29000:
Number of users registered in each year:
2012: 162
2015: 167
2014: 181
2013: 172
2017: 146
2016: 150
2018: 22
Median for Number of Friends
6.0
Median Age of Users
35.0
Median of User Balance Amount
5236.02313
Mean for number of Unread messages for Active females
48.60076045627376
User29001 to 30000:
Number of users registered in each year:
2015: 171
2016: 162
2013: 176
2017: 134
2012: 190
2014: 150
2018: 17
Median for Number of Friends
6.0
Median Age of Users
35.0
Median of User Balance Amount
5223.73094
Mean for number of Unread messages for Active females
49.118852459016395
User30001 to 31000:
Number of users registered in each year:
2016: 165
2014: 153
2012: 153
2015: 175
2013: 173
2017: 155
2018: 26
Median for Number of Friends
6.0
Median Age of Users
36.0
Median of User Balance Amount
5072.37238
Mean for number of Unread messages for Active females
50.85098039215686
User31001 to 32000:
Number of users registered in each year:
2015: 159
2012: 167
2016: 169
2017:

User56001 to 57000:
Number of users registered in each year:
2012: 151
2015: 179
2016: 166
2017: 173
2014: 168
2013: 140
2018: 23
Median for Number of Friends
5.0
Median Age of Users
35.0
Median of User Balance Amount
5209.67011
Mean for number of Unread messages for Active females
48.408396946564885
User57001 to 58000:
Number of users registered in each year:
2017: 162
2015: 198
2014: 130
2018: 28
2013: 151
2016: 164
2012: 167
Median for Number of Friends
5.0
Median Age of Users
35.0
Median of User Balance Amount
5191.17113
Mean for number of Unread messages for Active females
48.85140562248996
User58001 to 59000:
Number of users registered in each year:
2014: 157
2016: 177
2017: 153
2015: 152
2012: 166
2013: 172
2018: 23
Median for Number of Friends
6.0
Median Age of Users
35.0
Median of User Balance Amount
5271.67492
Mean for number of Unread messages for Active females
47.1025641025641
User59001 to 60000:
Number of users registered in each year:
2013: 171
2014: 161
2012: 161
2015: 

User84001 to 85000:
Number of users registered in each year:
2016: 163
2013: 188
2012: 168
2017: 155
2015: 154
2014: 144
2018: 28
Median for Number of Friends
5.0
Median Age of Users
35.0
Median of User Balance Amount
5112.47163
Mean for number of Unread messages for Active females
50.79746835443038
User85001 to 86000:
Number of users registered in each year:
2013: 154
2017: 172
2016: 141
2015: 179
2012: 154
2014: 177
2018: 23
Median for Number of Friends
5.0
Median Age of Users
35.0
Median of User Balance Amount
5254.35765
Mean for number of Unread messages for Active females
45.75092936802974
User86001 to 87000:
Number of users registered in each year:
2017: 172
2013: 155
2012: 176
2015: 172
2016: 155
2014: 152
2018: 18
Median for Number of Friends
6.0
Median Age of Users
35.0
Median of User Balance Amount
5207.28759
Mean for number of Unread messages for Active females
48.31349206349206
User87001 to 88000:
Number of users registered in each year:
2014: 160
2013: 165
2016: 169
2015: 

User112001 to 113000:
Number of users registered in each year:
2016: 179
2012: 149
2014: 150
2013: 137
2017: 188
2015: 163
2018: 34
Median for Number of Friends
6.0
Median Age of Users
35.0
Median of User Balance Amount
5404.27198
Mean for number of Unread messages for Active females
47.60698689956332
User113001 to 114000:
Number of users registered in each year:
2012: 171
2013: 156
2017: 169
2014: 156
2015: 146
2016: 170
2018: 32
Median for Number of Friends
6.0
Median Age of Users
35.0
Median of User Balance Amount
5266.57369
Mean for number of Unread messages for Active females
50.52651515151515
User114001 to 115000:
Number of users registered in each year:
2015: 168
2012: 145
2013: 174
2017: 159
2016: 149
2014: 180
2018: 25
Median for Number of Friends
5.0
Median Age of Users
35.0
Median of User Balance Amount
5245.86561
Mean for number of Unread messages for Active females
48.11969111969112
User115001 to 116000:
Number of users registered in each year:
2016: 146
2015: 172
2013: 16

User140001 to 141000:
Number of users registered in each year:
2013: 191
2014: 145
2018: 20
2017: 148
2016: 155
2015: 159
2012: 182
Median for Number of Friends
6.0
Median Age of Users
35.0
Median of User Balance Amount
5147.18405
Mean for number of Unread messages for Active females
50.83739837398374
User141001 to 142000:
Number of users registered in each year:
2012: 149
2016: 154
2013: 177
2015: 175
2014: 169
2017: 156
2018: 20
Median for Number of Friends
6.0
Median Age of Users
35.0
Median of User Balance Amount
5314.2301
Mean for number of Unread messages for Active females
52.95850622406639
User142001 to 143000:
Number of users registered in each year:
2017: 170
2015: 171
2014: 151
2012: 158
2013: 177
2016: 158
2018: 15
Median for Number of Friends
6.0
Median Age of Users
35.0
Median of User Balance Amount
5138.10467
Mean for number of Unread messages for Active females
48.982078853046595
User143001 to 144000:
Number of users registered in each year:
2013: 160
2014: 155
2015: 15

User168001 to 169000:
Number of users registered in each year:
2016: 171
2017: 158
2014: 154
2013: 171
2015: 161
2012: 170
2018: 15
Median for Number of Friends
5.0
Median Age of Users
34.0
Median of User Balance Amount
5249.7446
Mean for number of Unread messages for Active females
50.27235772357724
User169001 to 170000:
Number of users registered in each year:
2014: 149
2016: 168
2012: 167
2013: 155
2017: 180
2015: 157
2018: 24
Median for Number of Friends
5.0
Median Age of Users
35.0
Median of User Balance Amount
5460.35157
Mean for number of Unread messages for Active females
49.86206896551724
User170001 to 171000:
Number of users registered in each year:
2017: 176
2016: 187
2015: 158
2013: 168
2012: 153
2018: 16
2014: 142
Median for Number of Friends
5.0
Median Age of Users
34.0
Median of User Balance Amount
5203.70963
Mean for number of Unread messages for Active females
52.388663967611336
User171001 to 172000:
Number of users registered in each year:
2018: 22
2013: 178
2014: 168

User196001 to 197000:
Number of users registered in each year:
2014: 164
2015: 173
2012: 164
2016: 144
2013: 158
2017: 173
2018: 24
Median for Number of Friends
5.0
Median Age of Users
35.0
Median of User Balance Amount
5148.03451
Mean for number of Unread messages for Active females
50.253787878787875
User197001 to 198000:
Number of users registered in each year:
2015: 165
2012: 157
2016: 158
2014: 181
2013: 161
2017: 161
2018: 17
Median for Number of Friends
5.0
Median Age of Users
35.0
Median of User Balance Amount
5242.03748
Mean for number of Unread messages for Active females
47.95378151260504
User198001 to 199000:
Number of users registered in each year:
2015: 177
2017: 157
2012: 152
2014: 162
2016: 155
2013: 176
2018: 21
Median for Number of Friends
6.0
Median Age of Users
35.0
Median of User Balance Amount
5347.37975
Mean for number of Unread messages for Active females
47.376
User199001 to 200000:
Number of users registered in each year:
2014: 145
2012: 160
2017: 158
2015: 15