# Operators and Expressions in Python

## Libraries and settings

In [8]:
# Libraries
import os
import shutil
import json
import numpy as np
import pandas as pd
from tabulate import tabulate

# API credentials for Kaggle
with open('kaggle.json') as f:
    data = json.load(f)

os.environ['KAGGLE_USERNAME'] = data['username']
os.environ['KAGGLE_KEY'] = data['key']

from kaggle.api.kaggle_api_extended import KaggleApi

# Set up Kaggle API on GitHub Codespaces
# source_file = os.path.join(os.getcwd(), 'kaggle.json')
# destination_dir = os.path.join(os.getcwd(), '/home/vscode/.kaggle')
# shutil.copy(source_file, destination_dir)
# file_path = '/home/vscode/.kaggle/kaggle.json'
# os.chmod(file_path, 0o600)

# Ignore warnings
import warnings
warnings.filterwarnings('ignore')

# Show current working directory
print(os.getcwd())


FileNotFoundError: [Errno 2] No such file or directory: 'kaggle.json'

## Get data from Kaggle via Kaggle's Web API

In [6]:
# Initialize API
api = KaggleApi()
api.authenticate()

# Download file
api.dataset_download_file('vittoriogiatti/bigmacprice','BigmacPrice.csv')

# Read data to pandas data frame
df = pd.read_csv('BigmacPrice.csv', sep=',')
df

ApiException: (401)
Reason: Unauthorized
HTTP response headers: HTTPHeaderDict({'Content-Type': 'application/json', 'Date': 'Fri, 01 Mar 2024 15:16:44 GMT', 'Access-Control-Allow-Credentials': 'true', 'Access-Control-Allow-Origin': '*', 'Set-Cookie': 'ka_sessionid=fa5ec0ab1232d86a7f8dd87929f82232; max-age=2626560; path=/, GCLB=CL3m3sui4qLgdg; path=/; HttpOnly', 'Vary': 'Accept-Encoding', 'Turbolinks-Location': 'https://www.kaggle.com/api/v1/datasets/download/vittoriogiatti/bigmacprice/BigmacPrice.csv', 'X-Kaggle-MillisecondsElapsed': '6', 'X-Kaggle-RequestId': 'bcc64174300dd04c8ffcaac96506d1c6', 'X-Kaggle-ApiVersion': '1.6.7', 'X-Kaggle-HubVersion': '0.1.9', 'X-Frame-Options': 'SAMEORIGIN', 'Strict-Transport-Security': 'max-age=63072000; includeSubDomains; preload', 'Content-Security-Policy': "object-src 'none'; script-src 'nonce-iSnRtL6E1UJwjOcyqxNnkg==' 'report-sample' 'unsafe-inline' 'unsafe-eval' 'strict-dynamic' https: http:; base-uri 'none'; report-uri https://csp.withgoogle.com/csp/kaggle/20201130; frame-src 'self' https://www.kaggleusercontent.com https://www.youtube.com/embed/ https://polygraph-cool.github.io https://www.google.com/recaptcha/ https://www.docdroid.com https://www.docdroid.net https://kaggle-static.storage.googleapis.com https://kkb-production.jupyter-proxy.kaggle.net https://kkb-production.firebaseapp.com https://kaggle-metastore.firebaseapp.com https://apis.google.com https://content-sheets.googleapis.com/ https://accounts.google.com/ https://storage.googleapis.com https://docs.google.com https://drive.google.com https://calendar.google.com/;", 'X-Content-Type-Options': 'nosniff', 'Referrer-Policy': 'strict-origin-when-cross-origin', 'Via': '1.1 google', 'Alt-Svc': 'h3=":443"; ma=2592000,h3-29=":443"; ma=2592000', 'Transfer-Encoding': 'chunked'})
HTTP response body: b'{"code":401,"message":"Unauthenticated"}'


## Use Python operators and expressions to create subsets of the data

### Subset based on date & dollar price

In [None]:
# Countries with lower Big Mac prices
df_sub_01 = df.loc[(df['date'] == '2022-07-01') & (df['dollar_price'] <= 2.00)]
print('\nCountries with lower Big Mac prices\n')
print(tabulate(df_sub_01, headers=list(df_sub_01.columns)))

# Countries with higher Big Mac prices
df_sub_02 = df.loc[(df['date'] == '2022-07-01') & (df['dollar_price'] >= 6.00)]
print('\nCountries with higher Big Mac prices\n',)
print(tabulate(df_sub_02, headers=list(df_sub_02.columns)))

### Subset based on country

In [None]:
# Look at the inflation indicated by the development of 'local_price'
df_sub_03 = df.loc[(df['name'] == 'Venezuela')]
df_sub_03.head()

### Subset based on multiple variables and operators

In [None]:
# Note the operators here: == (is equal to), & (and), | (OR)
df_sub_04 = df.loc[(df['date'] == '2022-07-01') & ((df['name'] == 'Switzerland') | (df['dollar_price'] >= 6.80))]
df_sub_04

## Use Python operators and expressions to create new variables

### Create a new variable 'dollar_price_category' based on the dollar price


In [None]:
# Here, pandas apply() method is used in combination with the lambda function (we will come back to this later in the module)
df['dollar_price_category'] = df['dollar_price'].apply(lambda x: 'higher price' if x >= 5.00 else 'lower price')
df

### Create a pivot table with the new variable 'dollar_price_category'  

In [None]:
# Subset only the lates prices
df_sub_05 = df.loc[(df['date'] == '2022-07-01')]

# Create pivot table with mean prices
table = pd.pivot_table(df_sub_05[['name', 'dollar_price', 'dollar_price_category']],
                       index=['dollar_price_category', 'name'],
                       values=['dollar_price'],
                       aggfunc=np.mean)
print(table)

### Jupyter notebook --footer info-- (please always provide this at the end of each notebook)

In [None]:
import os
import platform
import socket
from platform import python_version
from datetime import datetime

print('-----------------------------------')
print(os.name.upper())
print(platform.system(), '|', platform.release())
print('Datetime:', datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
print('Python Version:', python_version())
print('-----------------------------------')