# Bots

### Combining

We're combining a lot of `.json` files into one large file of *Bot or Not* API calls. The file is structured like this... 

```
"botOrNot": [
        {
            "categories": {
                "content_classification": 0.65,
                "friend_classification": 0.38,
                "network_classification": 0.52,
                "sentiment_classification": 0.43,
                "temporal_classification": 0.8278391959798995,
                "user_classification": 0.63
            },
            "score": 0.56,
            "screen_name": "jCar89000",
            "user_id": "3532804813"
        },
        { ...
```

In [1]:
# Libraries
import glob
import json

import pandas as pd

In [2]:
# Directories
botDir = '../../data/processed/bots/'
userDir = '../../data/processed/users/update/'
outputDir = botDir + 'output/'

In [None]:
%ls 

In [3]:
# Crudely combine
process = []
for f in glob.glob((botDir + "*.json")):
    with open(f, "rb") as infile:
        process.append(json.load(infile))
        
        
# Examples
workingEx = process[1]['botOrNot'][0]
brokenEx = process[0]['botOrNot'][0]

print (workingEx)
print (brokenEx)

IndexError: list index out of range

In [4]:
print ('Results per file:')
for f in process:
    results = f['botOrNot']
    count = 0
    for i in results:
        count += 1
    print (count)

Results per file:
360
149
1081
1015
163
216
267
226
854
553
1087


In [5]:
records = []

for f in process:
    results = f['botOrNot']
    
    for result in results:
        try:
            if result['result']:
                continue
        except:
            pass
        
        try:
            score = result['score']
            userID = result['user_id']
            screenName  = result['screen_name']
            
            cats = result['categories']
            contentClass = cats['content_classification']
            tempClass = cats['temporal_classification']
            netClass = cats['network_classification']
            friendClass = cats['friend_classification']
            sentClass = cats['sentiment_classification']
            userClass = cats['user_classification']
            
            records.append({'userID':userID, 'screenName':screenName, 'score':score,
                    'contentClass':contentClass,
                     'tempClass': tempClass,
                     'netClass': netClass,
                     'friendClass': friendClass,
                    'sentClass': sentClass,
                    'userClass': userClass})
            
            
        except Exception as e:
            print (e)

In [6]:
bots = pd.DataFrame.from_records(records)
bots.head()

Unnamed: 0,contentClass,friendClass,netClass,score,screenName,sentClass,tempClass,userClass,userID
0,0.65,0.38,0.52,0.56,jCar89000,0.43,0.827839,0.63,3532804813
1,0.88,0.793147,0.397822,0.57,dtrumptvfan,0.82,0.79,0.16,3920277023
2,0.67,0.75,0.888,0.49,LrBlancoo,0.53,0.41,0.34,717358485030244352
3,0.76,0.52,0.61,0.64,DavidAvritt,0.76,0.37,0.53,15195038
4,0.56,0.58,0.42,0.48,CarolHello1,0.49,0.37,0.41,525641456


### Users

Grab the filtered users we sent out to *Bot or Not*.

In [8]:
users = pd.read_csv((userDir + 'filtered-users.csv'), dtype={'userID': 'object'})
users.head()

Unnamed: 0,userID,streamTweets,accountDuration,numDays,allTweets,followers,following
0,3532804813,21298,227,32,179477,354,7
1,3920277023,10727,199,24,92391,5,0
2,717358485030244352,6541,181,17,47580,96,67
3,15195038,5814,3025,57,627202,2098,956
4,525641456,5477,1659,48,134010,3755,4121


In [12]:
# join the tables
output = pd.merge(bots, users, on=['userID', 'userID'])
# Add the bot classification from Bot or Not's overall score
output['bot'] = output['score'].apply(lambda x: 1 if (x > 0.5) else 0)
output.head()

Unnamed: 0,contentClass,friendClass,netClass,score,screenName,sentClass,tempClass,userClass,userID,streamTweets,accountDuration,numDays,allTweets,followers,following,bot
0,0.65,0.38,0.52,0.56,jCar89000,0.43,0.827839,0.63,3532804813,21298,227,32,179477,354,7,1
1,0.88,0.793147,0.397822,0.57,dtrumptvfan,0.82,0.79,0.16,3920277023,10727,199,24,92391,5,0,1
2,0.67,0.75,0.888,0.49,LrBlancoo,0.53,0.41,0.34,717358485030244352,6541,181,17,47580,96,67,0
3,0.76,0.52,0.61,0.64,DavidAvritt,0.76,0.37,0.53,15195038,5814,3025,57,627202,2098,956,1
4,0.56,0.58,0.42,0.48,CarolHello1,0.49,0.37,0.41,525641456,5477,1659,48,134010,3755,4121,0


In [43]:
output.to_csv((outputDir + "merged-bots.csv"), index=False)

In [16]:
# only the confirmed bots, with filtered columns
output[output['bot'] == 1][['userID', 'screenName']].to_csv(\
                    (outputDir + "bot-ids.csv"), index=False)