# Change Working Directory

In [1]:
import pandas as pd
import os

In [2]:
abspath = os.path.abspath('OO_Importing_Data.py') # Get filepath
dname = os.path.dirname(abspath) # Get directory
os.chdir(dname) # Make directory working directory

# EDA

## Reading in the Data

In [3]:
data = pd.read_json('assets/newdump.json')

## Splitting up the 'Channel Info' dictionaries into seperate columns

In [4]:
data['channel_type'] = [x['type'] for x in data['channel_info']]
data['channel'] = [x['channel'] for x in data['channel_info']]

In [5]:
data.drop('channel_info', axis = 1, inplace=True)

## Showing only Facebook and Instagram Data

In [6]:
FB_and_IG_data = data.loc[(data['channel'] == 'facebook') | (data['channel'] == 'instagram')]

## Removing '' and [] from 'type' column for queries

In [7]:
FB_and_IG_data['channel_type'] = FB_and_IG_data['channel_type'].apply(lambda x: x[0])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


## Breaking down number of entries for each type of post. Looks like Facebook is a clear winner

In [8]:
FB_and_IG_data['type'].value_counts()

facebook post    249485
instagram pic     17526
instagram vid      2664
Name: type, dtype: int64

## Replacing Values in 'brand' with the actual publication

In [9]:
FB_and_IG_data['brand'].value_counts()

137322    46545
137326    37444
137329    32608
137299    31680
137316    28308
137325    25983
137321    24165
137314    22100
137300    20842
Name: brand, dtype: int64

Found these by plugging urls into google and seeing what showed up
* Brand 137314 = Conde Naste Traveler
* Brand 137329 = W Magazine
* Brand 137321 = OnSelf Magazine
* Brand 137325 = Vanity Fair
* Brand 137300 = Clever
* Brand 137322 = Teen Vogue
* Brand 137299 = Allure
* Brand 137326 = Vogue
* Brand 137316 = Glamor

In [10]:
brands = {137314 : 'Conde_Naste_Traveler', 
          137329 : 'W_Magazine',
          137321 : 'Onself',
          137325 : 'Vanity_Fair', 
          137300 : 'Clever', 
          137322 : 'Teen_Vogue', 
          137299 : 'Allure', 
          137326 : 'Vogue',137316 : 'Glamor'
         }
FB_and_IG_data['brand'] = FB_and_IG_data['brand'].map(brands)                                                     

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # Remove the CWD from sys.path while we load stuff.


## Taking a subset of just the Instagram data

In [11]:
instagram = FB_and_IG_data.loc[FB_and_IG_data['type'].isin(['instagram pic', 'instagram vid'])]

In [12]:
instagram = instagram.reset_index(drop=True)

## Getting data from the content column

In [56]:
instacontent = instagram['content']

In [160]:
sum([instacontent[i].keys() == instacontent[0].keys() for i in range(len(instacontent))])
#Seeing how many image entries we have.

17526

In [84]:
sum([instacontent[i].keys() != instacontent[0].keys() for i in range(len(instacontent))])
#Seeing how many video entries we have.

2664

## Splitting Instagram data into seperate image and video frames

In [97]:
instapics = instagram.loc[(instagram['type'] == 'instagram pic')]
instapics = instapics.reset_index(drop=True)

Unnamed: 0,brand,content,engagement,has_spend,id,impact,share_token,timestamp,type,urls,channel_type,channel
0,Vanity_Fair,"{'links': [], 'post_id': '1649041401268187216_...",22842,,MTM3MzI1LTE3MTM5MTc3X2luc3RhZ3JhbSBwaWNfMTc0ODM,0.916917,Rf-5cZ4Jd85T3ELj-vBlTwNQnqB4gu61mQ1EpB60sWzore...,2017-11-16 02:57:55,instagram pic,http://instdrive.com/p/1649041401268187216_112...,photo,instagram
1,Glamor,"{'links': [], 'post_id': '1649001509906326487_...",515,,MTM3MzE2LTE3MTMzNDMwX2luc3RhZ3JhbSBwaWNfMTc0ODM,0.104155,_jVh9Q5jkyQvc0vrcEvC3ANQnqB4gu61mQ1EpB60sWzore...,2017-11-16 01:38:40,instagram pic,http://instdrive.com/p/1649001509906326487_100...,photo,instagram
2,Allure,"{'links': [], 'post_id': '1648982356717300124_...",1128,,MTM3Mjk5LTE3MTM2NjM4X2luc3RhZ3JhbSBwaWNfMTc0ODM,0.373609,0QUMc4LpJEFQMVmFKCRD2wNQnqB4gu61mQ1EpB60sWzore...,2017-11-16 01:00:36,instagram pic,http://instdrive.com/p/1648982356717300124_248...,photo,instagram
3,Vanity_Fair,"{'links': [], 'post_id': '1648951132480942881_...",16709,,MTM3MzI1LTE3MTM5MTc4X2luc3RhZ3JhbSBwaWNfMTc0ODM,0.665746,O8uYi1okeMI9skZXMJXnkwNQnqB4gu61mQ1EpB60sWzore...,2017-11-15 23:58:34,instagram pic,http://instdrive.com/p/1648951132480942881_112...,photo,instagram
4,Clever,"{'links': [], 'post_id': '1648930367465254552_...",6545,,MTM3MzAwLTE3MTM2OTU0X2luc3RhZ3JhbSBwaWNfMTc0ODM,0.302599,m-LLj1UQCfKTd_aXjCJ-rwNQnqB4gu61mQ1EpB60sWzore...,2017-11-15 23:17:19,instagram pic,http://instdrive.com/p/1648930367465254552_239...,photo,instagram
5,Allure,"{'links': [], 'post_id': '1648906150550541915_...",776,,MTM3Mjk5LTE3MTM2NjM5X2luc3RhZ3JhbSBwaWNfMTc0ODM,0.256976,gOhdb8AbTqJX8fCLMwcUjwNQnqB4gu61mQ1EpB60sWzore...,2017-11-15 22:29:12,instagram pic,http://instdrive.com/p/1648906150550541915_248...,photo,instagram
6,Teen_Vogue,"{'links': [], 'post_id': '1648901053164193836_...",19198,,MTM3MzIyLTE3MTM2MDM1X2luc3RhZ3JhbSBwaWNfMTc0ODM,0.628635,DgrEc2i0AMzCbpjUisLdBQNQnqB4gu61mQ1EpB60sWzore...,2017-11-15 22:19:04,instagram pic,http://instdrive.com/p/1648901053164193836_407...,photo,instagram
7,W_Magazine,"{'links': ['wmag.com'], 'post_id': '1648897246...",8658,,MTM3MzI5LTE3MTM3NTYxX2luc3RhZ3JhbSBwaWNfMTc0ODM,0.609872,7skbmQRQ9ZdPaZpxRtV9cgNQnqB4gu61mQ1EpB60sWzore...,2017-11-15 22:11:31,instagram pic,http://instdrive.com/p/1648897246865187226_146...,photo,instagram
8,Vanity_Fair,"{'links': [], 'post_id': '1648838061670306552_...",23405,,MTM3MzI1LTE3MTI5Nzg3X2luc3RhZ3JhbSBwaWNfMTc0ODM,0.932539,mlVQpOh_AOeb4Zv5Xfh6JgNQnqB4gu61mQ1EpB60sWzore...,2017-11-15 20:13:55,instagram pic,http://instdrive.com/p/1648838061670306552_112...,photo,instagram
9,Vogue,"{'links': [], 'post_id': '1648827216232109257_...",14833,,MTM3MzI2LTE3MTMyMzA2X2luc3RhZ3JhbSBwaWNfMTc0ODM,0.225018,F-bgroQ4ng-l0m8IS568ZwNQnqB4gu61mQ1EpB60sWzore...,2017-11-15 19:52:22,instagram pic,http://instdrive.com/p/1648827216232109257_198...,photo,instagram


In [98]:
instavids = instagram.loc[(instagram['type'] == 'instagram vid')]
instavids = instavids.reset_index(drop=True)

Unnamed: 0,brand,content,engagement,has_spend,id,impact,share_token,timestamp,type,urls,channel_type,channel
0,Onself,"{'links': [], 'video_url': 'https://scontent.c...",104,,MTM3MzIxLTE3MzY1MDJfaW5zdGFncmFtIHZpZF8xNzQ4Mw,0.022920,bA4Sxohje20AVm5hmQmocX5b42_AaVjVFsyER-14Wf5mUM...,2017-11-15 22:27:58,instagram vid,http://instdrive.com/p/1648905426908873068_136...,video,instagram
1,Glamor,"{'links': [], 'video_url': 'https://scontent.c...",3860,,MTM3MzE2LTE3MzY2MTNfaW5zdGFncmFtIHZpZF8xNzQ4Mw,0.778408,qAUAGcSV00izhf9Z2k6V8H5b42_AaVjVFsyER-14Wf5mUM...,2017-11-15 20:52:13,instagram vid,http://instdrive.com/p/1648856742413281563_100...,video,instagram
2,Vogue,"{'links': [], 'video_url': 'https://scontent.c...",13755,,MTM3MzI2LTE3MzYzNzVfaW5zdGFncmFtIHZpZF8xNzQ4Mw,0.208665,kfPI1z8sTtT_kXgwo6XqpX5b42_AaVjVFsyER-14Wf5mUM...,2017-11-15 14:40:32,instagram vid,http://instdrive.com/p/1648670137240269255_198...,video,instagram
3,Teen_Vogue,"{'links': [], 'video_url': 'https://scontent.c...",28933,,MTM3MzIyLTE3MzU1NjlfaW5zdGFncmFtIHZpZF8xNzQ4Mw,0.947405,f9hg79ndCZoElujxXSaYSH5b42_AaVjVFsyER-14Wf5mUM...,2017-11-15 13:07:04,instagram vid,http://instdrive.com/p/1648623219564845734_407...,video,instagram
4,Glamor,"{'links': [], 'video_url': 'https://scontent.c...",1156,,MTM3MzE2LTE3MzQ5MzNfaW5zdGFncmFtIHZpZF8xNzQ4Mw,0.233119,d1sq1zo2mKHfEWOgRxFBeX5b42_AaVjVFsyER-14Wf5mUM...,2017-11-15 02:56:15,instagram vid,http://instdrive.com/p/1648315358530861537_100...,video,instagram
5,Glamor,"{'links': [], 'video_url': 'https://scontent.c...",985,,MTM3MzE2LTE3MzQ5MzRfaW5zdGFncmFtIHZpZF8xNzQ4Mw,0.198635,LLevAWDrlOj0TZ8AFmtwWH5b42_AaVjVFsyER-14Wf5mUM...,2017-11-15 01:53:05,instagram vid,http://instdrive.com/p/1648283945693667309_100...,video,instagram
6,Glamor,"{'links': [], 'video_url': 'https://scontent.c...",1396,,MTM3MzE2LTE3MzQ5MzVfaW5zdGFncmFtIHZpZF8xNzQ4Mw,0.281517,SKHmEq3h7BczNHjIM2KEQH5b42_AaVjVFsyER-14Wf5mUM...,2017-11-15 00:02:30,instagram vid,http://instdrive.com/p/1648227964720885266_100...,video,instagram
7,Glamor,"{'links': [], 'video_url': 'https://scontent.c...",1632,,MTM3MzE2LTE3MzQ5MzZfaW5zdGFncmFtIHZpZF8xNzQ4Mw,0.316174,0Lgx4eAfy25WVt5-g1ADln5b42_AaVjVFsyER-14Wf5mUM...,2017-11-14 23:31:48,instagram vid,http://instdrive.com/p/1648212717167585329_100...,video,instagram
8,Onself,"{'links': [], 'video_url': 'https://scontent.c...",13642,,MTM3MzIxLTE3MzQ4MTBfaW5zdGFncmFtIHZpZF8xNzQ4Mw,2.964285,m8dL01--44os6Fp_EOGbs35b42_AaVjVFsyER-14Wf5mUM...,2017-11-14 22:23:35,instagram vid,http://instdrive.com/p/1648178351633839879_136...,video,instagram
9,Vanity_Fair,"{'links': [], 'video_url': 'https://scontent.c...",3044,,MTM3MzI1LTE3MzQwODFfaW5zdGFncmFtIHZpZF8xNzQ4Mw,0.113712,n3O9Oi4mPujaGaT31y51hX5b42_AaVjVFsyER-14Wf5mUM...,2017-11-14 21:35:18,instagram vid,http://instdrive.com/p/1648154245859449517_112...,video,instagram


In [99]:
instapicscontent = instapics['content']

In [137]:
picframe = pd.DataFrame.from_dict(instapicscontent)

In [100]:
instavidscontent = instavids['content']

In [146]:
vidframe = pd.DataFrame.from_dict(instavidscontent)

In [147]:
picframe['caption']  = [x['caption'] for x in instapics['content']]
picframe['comment_count']  = [x['comment_count'] for x in instapics['content']]
picframe['filter_name'] = [x['filter_name'] for x in instapics['content']]
picframe['hashtags'] = [x['hashtags'] for x in instapics['content']]
picframe['image_url'] = [x['image_url'] for x in instapics['content']]
picframe['like_count'] = [x['like_count'] for x in instapics['content']]
picframe['link'] = [x['link'] for x in instapics['content']]
picframe['links'] = [x['links'] for x in instapics['content']]
picframe['post_id'] = [x['post_id'] for x in instapics['content']]

In [149]:
picframe.drop(['content'], axis = 1, inplace = True)
picframe = picframe.reset_index(drop=True)
picframe.head()

In [156]:
vidframe['caption'] = [x['caption'] for x in instavids['content']]
vidframe['comment_count'] = [x['comment_count'] for x in instavids['content']]
vidframe['filter_name'] = [x['filter_name'] for x in instavids['content']]
vidframe['hashtags'] = [x['hashtags'] for x in instavids['content']]
vidframe['image_url'] = [x['image_url'] for x in instavids['content']]
vidframe['like_count'] = [x['like_count'] for x in instavids['content']]
vidframe['link'] = [x['link'] for x in instavids['content']]
vidframe['links'] = [x['links'] for x in instavids['content']]
vidframe['post_id'] = [x['post_id'] for x in instavids['content']]
vidframe['video_url'] = [x['video_url'] for x in instavids['content']]

In [None]:
vidframe.drop(['content'], axis = 1, inplace = True)
vidframe = vidframe.reset_index(drop=True)
vidframe.head()