# Google Takeout
The purpose of this Jupyter notebook is to help discover the way the data in Google Takeout is stored.

In [1]:
from zipfile import ZipFile as zf

In [2]:
file = 'C:\\Users\\lenny.meerwood\\Downloads\\takeout-20190628T012755Z-001.zip'

In [7]:
with zf(file) as zipped:
    
    infos = zipped.infolist()

folders = set()
for info in infos:
    root_folder = info.filename.split('/')[1]
    if '.html' not in root_folder:
        folders.add(root_folder)

print(folders)

{'Android Device Configuration Service', 'Google Play Music', 'Home App', 'Google One', 'Fit', 'Google Shopping', 'Google Play Books', 'Cloud Print', 'Reminders', 'Fusion Tables', 'Google Pay', 'Tasks (Migration)', 'Voice', 'Google Play Store', 'Google+ Stream', 'Keep', 'Hangouts', 'YouTube', 'Maps (your places)', 'Purchases _ Reservations', 'G Suite Marketplace', 'News', 'Shopping Lists', 'Bookmarks', 'My Maps', 'Blogger', 'Profile', 'My Activity', 'Maps', 'Google Play Movies _ TV', 'Location History', 'Contacts', 'Chrome', 'Calendar', 'Google Play Games Services', 'Saved', 'Google My Business'}


In [None]:
import os, glob, json, time

In [None]:
current_dir = os.getcwd()
print(current_dir)
os.chdir('C:\\Users\\lenny.meerwood\\Desktop\\Takeout\\Purchases _ Reservations')

In [None]:
orders = {}
read_count = 0
parsed_count = 0
for file in glob.glob("*.json"):
    read_count += 1
    try:
        with open(file) as json_file:
            data = json.load(json_file)
            
            merchant = data.get('transactionMerchant')
            if merchant is not None:
                merchant_name = merchant.get('name', "NO NAME")
            else:
                merchant_name = data.get('merchantOrderId', "NO MERCHANT NAME OR TRANSACTION ID")
            merchant_name = f"{merchant_name} {read_count}"
            purchase_epoch = int(data['creationTime']['usecSinceEpochUtc']) / 1_000_000
            date_of_purchase = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(purchase_epoch))
            orders[merchant_name] = date_of_purchase
            parsed_count += 1
    except UnicodeDecodeError as decode_err:
        pass
#         print(f"\n\nCould not open {file}.\n{decode_err}\n\n")
    except KeyError as key_err:
        pass
#         print(f"\n\n Skipped file {file}. It has no timestamp\n\n")
    except AttributeError as err:
        print(f"\n\n Skipped file {file}. Had an attribute error: {err}\n\n")
        
print(f"Read {read_count} files, parsed {parsed_count}")

In [None]:
import csv

In [None]:
os.chdir(current_dir)
keys = orders.keys()
with open('purchases.csv', 'w') as output_file:
    csv_writer = csv.writer(output_file, lineterminator='\n')
    csv_writer.writerow(['Merchant ID', 'Date'])
    for key in keys:
        csv_writer.writerow([key, orders[key]])
    