# Change Working Directory

In [1]:
import pandas as pd
import os

In [2]:
abspath = os.path.abspath('OO_Importing_Data.py') # Get filepath
dname = os.path.dirname(abspath) # Get directory
os.chdir(dname) # Make directory working directory

# EDA

## Reading in the Data

In [3]:
data = pd.read_json('assets/newdump.json')

## Splitting up the 'Channel Info' dictionaries into seperate columns

In [4]:
data['channel_type'] = [x['type'] for x in data['channel_info']]
data['channel'] = [x['channel'] for x in data['channel_info']]

In [5]:
data.drop('channel_info', axis = 1, inplace=True)

## Showing only Facebook and Instagram Data

In [6]:
FB_and_IG_data = data.loc[(data['channel'] == 'facebook') | (data['channel'] == 'instagram')]

## Removing '' and [] from 'type' column for queries

In [7]:
FB_and_IG_data['channel_type'] = FB_and_IG_data['channel_type'].apply(lambda x: x[0])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


## Breaking down number of entries for each type of post. Looks like Facebook is a clear winner

In [8]:
FB_and_IG_data['type'].value_counts()

facebook post    249485
instagram pic     17526
instagram vid      2664
Name: type, dtype: int64

## Replacing Values in 'brand' with the actual publication

In [9]:
FB_and_IG_data['brand'].value_counts()

137322    46545
137326    37444
137329    32608
137299    31680
137316    28308
137325    25983
137321    24165
137314    22100
137300    20842
Name: brand, dtype: int64

Found these by plugging urls into google and seeing what showed up
* Brand 137314 = Conde Naste Traveler
* Brand 137329 = W Magazine
* Brand 137321 = OnSelf Magazine
* Brand 137325 = Vanity Fair
* Brand 137300 = Clever
* Brand 137322 = Teen Vogue
* Brand 137299 = Allure
* Brand 137326 = Vogue
* Brand 137316 = Glamor

In [10]:
brands = {137314 : 'Conde_Naste_Traveler', 
          137329 : 'W_Magazine',
          137321 : 'Onself',
          137325 : 'Vanity_Fair', 
          137300 : 'Clever', 
          137322 : 'Teen_Vogue', 
          137299 : 'Allure', 
          137326 : 'Vogue',137316 : 'Glamor'
         }
FB_and_IG_data['brand'] = FB_and_IG_data['brand'].map(brands)                                                     

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # Remove the CWD from sys.path while we load stuff.


## Taking a subset of just the Instagram data

In [11]:
instagram = FB_and_IG_data.loc[FB_and_IG_data['type'].isin(['instagram pic', 'instagram vid'])]

In [12]:
instagram = instagram.reset_index(drop=True)

## Turning post_id into urls

In [13]:
instagram['post_id'] = [x['post_id'] for x in instagram['content']]

In [14]:
instagram['urls'] = 'http://instdrive.com/p/' + instagram['post_id'].astype(str)

In [18]:
instagram.drop(['post_id'], axis=1, inplace=True)

In [19]:
instagram.head()

Unnamed: 0,brand,content,engagement,has_spend,id,impact,share_token,timestamp,type,urls,channel_type,channel
0,Vanity_Fair,"{'links': [], 'post_id': '1649041401268187216_...",22842,,MTM3MzI1LTE3MTM5MTc3X2luc3RhZ3JhbSBwaWNfMTc0ODM,0.916917,Rf-5cZ4Jd85T3ELj-vBlTwNQnqB4gu61mQ1EpB60sWzore...,2017-11-16 02:57:55,instagram pic,http://instdrive.com/p/1649041401268187216_112...,photo,instagram
1,Glamor,"{'links': [], 'post_id': '1649001509906326487_...",515,,MTM3MzE2LTE3MTMzNDMwX2luc3RhZ3JhbSBwaWNfMTc0ODM,0.104155,_jVh9Q5jkyQvc0vrcEvC3ANQnqB4gu61mQ1EpB60sWzore...,2017-11-16 01:38:40,instagram pic,http://instdrive.com/p/1649001509906326487_100...,photo,instagram
2,Allure,"{'links': [], 'post_id': '1648982356717300124_...",1128,,MTM3Mjk5LTE3MTM2NjM4X2luc3RhZ3JhbSBwaWNfMTc0ODM,0.373609,0QUMc4LpJEFQMVmFKCRD2wNQnqB4gu61mQ1EpB60sWzore...,2017-11-16 01:00:36,instagram pic,http://instdrive.com/p/1648982356717300124_248...,photo,instagram
3,Vanity_Fair,"{'links': [], 'post_id': '1648951132480942881_...",16709,,MTM3MzI1LTE3MTM5MTc4X2luc3RhZ3JhbSBwaWNfMTc0ODM,0.665746,O8uYi1okeMI9skZXMJXnkwNQnqB4gu61mQ1EpB60sWzore...,2017-11-15 23:58:34,instagram pic,http://instdrive.com/p/1648951132480942881_112...,photo,instagram
4,Clever,"{'links': [], 'post_id': '1648930367465254552_...",6545,,MTM3MzAwLTE3MTM2OTU0X2luc3RhZ3JhbSBwaWNfMTc0ODM,0.302599,m-LLj1UQCfKTd_aXjCJ-rwNQnqB4gu61mQ1EpB60sWzore...,2017-11-15 23:17:19,instagram pic,http://instdrive.com/p/1648930367465254552_239...,photo,instagram
