# File script

In [1]:
import os

get_raw_data_script_file = os.path.join(os.path.pardir, 'src', 'data', 'get_raw_data.py')

In [6]:
%%writefile $get_raw_data_script_file

import requests
from requests import session
import os
from dotenv import load_dotenv, find_dotenv
import logging

payload = {
    "action": "login",
    "username": os.environ.get("KAGGLE_USERNAME"),
    "password": os.environ.get("KAGGLE_PASSWORD")
}

login_url = "https://www.kaggle.com/account/login"

def extract_data(file_path, url):

    with session() as s:
        s.post(login_url, data=payload)
        
        with open(file_path, "w") as handle:
            response = s.get(url, stream=True)
            for block in response.iter_content(1024):
                print block
                handle.write(block)

def main(project_dir):
    logger = logging.getLogger(__name__)
    logger.info("Getting raw data")
    
    training_data_url = "https://www.kaggle.com/c/titanic/download/train.csv"
    test_data_url = "https://www.kaggle.com/c/titanic/download/test.csv"
    
    raw_data_path = os.path.join(project_dir, 'data', 'raw')
    train_data_path = os.path.join(raw_data_path, 'train.csv')
    test_data_path = os.path.join(raw_data_path, 'test.csv')
        
    extract_data(train_data_path, training_data_url)
    extract_data(test_data_path, test_data_url)
    
    logger.info("Downloaded raw data")
    
    
if __name__ == "__main__":
    project_dir = os.path.join(os.path.dirname(__file__), os.path.pardir, os.path.pardir)
    log_fmt = "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
    logging.basicConfig(level=logging.INFO, format=log_fmt)
    dotenv_path = find_dotenv()
    load_dotenv(dotenv_path)
    main(project_dir)

Overwriting ../src/data/get_raw_data.py


In [7]:
! python $get_raw_data_script_file

2018-01-14 14:33:24,811 - __main__ - INFO - Getting raw data
<!DOCTYPE html>
<html>
<head>
    <title>Kaggle: Your Home for Data Science</title>
    <meta charset="utf-8" />
    <meta name="robots" content="index, follow"/>
    <meta name="theme-color" content="#008ABC" />
    <link rel="dns-prefetch" href="https://www.google-analytics.com" /><link rel="dns-prefetch" href="https://stats.g.doubleclick.net" /><link rel="dns-prefetch" href="https://js.intercomcdn.com" /><link rel="preload" href="https://az416426.vo.msecnd.net/scripts/a/ai.0.js" as=script /><link rel="dns-prefetch" href="https://kaggle2.blob.core.windows.net" />
    <link href="/content/v/d420a040e581/kaggle/favicon.ico" rel="shortcut icon" type="image/x-icon" />
    <link rel="manifest" href="/static/json/manifest.json">
    <link href="//fonts.googleapis.com/css?family=Open+Sans:400,300,300italic,400italic,600,600italic,700,700italic" rel='stylesheet' type='text/css'>
                    <link rel="stylesheet" type="text