# Operators and Expressions in Python

## Libraries and settings

In [None]:
# Libraries
import warnings
import os
import shutil
import json
import numpy as np
import pandas as pd
from tabulate import tabulate

# API credentials for Kaggle
with open('./data/kaggle.json') as f:
    data = json.load(f)

os.environ['KAGGLE_USERNAME'] = data['username']
os.environ['KAGGLE_KEY'] = data['key']

from kaggle.api.kaggle_api_extended import KaggleApi

# Ignore warnings
warnings.filterwarnings('ignore')

# Show current working directory
print(os.getcwd())

## Initialize the Kaggle API

In [None]:
# Initialize API
api = KaggleApi()
api.authenticate()

## Get data from Kaggle via Kaggle's Web API

In [None]:
# Download file
api.dataset_download_file('vittoriogiatti/bigmacprice',
                          'BigmacPrice.csv',
                          path='./data')

# Read data to pandas data frame
df = pd.read_csv('./data/BigmacPrice.csv', sep=',')
df

## Use Python operators and expressions to create subsets of the data

### Subset based on date & dollar price

In [None]:
# Countries with lower Big Mac prices
df_sub_01 = df.loc[(df['date'] == '2022-07-01') & (df['dollar_price'] <= 2.00)]
print('\nCountries with lower Big Mac prices\n')
print(tabulate(df_sub_01, headers=list(df_sub_01.columns)))

# Countries with higher Big Mac prices
df_sub_02 = df.loc[(df['date'] == '2022-07-01') & (df['dollar_price'] >= 6.00)]
print('\nCountries with higher Big Mac prices\n',)
print(tabulate(df_sub_02, headers=list(df_sub_02.columns)))

### Subset based on country

In [None]:
# Look at the inflation indicated by the development of 'local_price'
df_sub_03 = df.loc[(df['name'] == 'Venezuela')]
df_sub_03.head()

### Subset based on multiple variables and operators

In [None]:
# Note the operators here: == (is equal to), & (and), | (OR)
df_sub_04 = df.loc[(df['date'] == '2022-07-01') &
                   ((df['name'] == 'Switzerland') | (df['dollar_price'] >= 6.80))]
df_sub_04

## Use Python operators and expressions to create new variables

### Create a new variable 'dollar_price_category' based on the dollar price


In [None]:
# Here, pandas apply() method is used in combination with the lambda function (we will come back to this later in the module)
df['dollar_price_category'] = df['dollar_price'].apply(
    lambda x: 'higher price' if x >= 5.00 else 'lower price')
df

### Create a pivot table with the new variable 'dollar_price_category'  

In [None]:
# Subset only the lates prices
df_sub_05 = df.loc[(df['date'] == '2022-07-01')]

# Create pivot table with mean prices
table = pd.pivot_table(df_sub_05[['name', 'dollar_price', 'dollar_price_category']],
                       index=['dollar_price_category', 'name'],
                       values=['dollar_price'],
                       aggfunc=np.mean)
print(table)

### Jupyter notebook --footer info-- (please always provide this at the end of each notebook)

In [None]:
import os
import platform
import socket
from platform import python_version
from datetime import datetime

print('-----------------------------------')
print(os.name.upper())
print(platform.system(), '|', platform.release())
print('Datetime:', datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
print('Python Version:', python_version())
print('-----------------------------------')