# Advanced programming techniques in Python

## Libraries and settings

In [1]:
# Libraries
import os
import re
import numpy as np
import pandas as pd
from requests import get
from functools import reduce
from bs4 import BeautifulSoup

# Ignore warnings
import warnings
warnings.filterwarnings('ignore')

# Show current working directory
print(os.getcwd())

/Users/sivanujanselvarajah/Documents/zhaw/4.Semester/Scientific programming/scientific_programming/Week_10/exercises


## Lambda

In [2]:
# Regular function
def add_xy(x, y):
    return x + y

print(add_xy(42, 42))

# Lambda function
add_xy = lambda x, y: x + y
print(add_xy(41, 1))

84
42


In [4]:
# Regular function
def add_xy(x, y):
    return x + y

print(add_xy(42, 42))

# Lambda function
add_xy = lambda x, y : x + y
print(add_xy(20, 30))

# Lambda function
var = lambda a, b, c : a + b + c
print(var(20,20,2))

84
50
42


In [4]:
# Creating a pandas dataframe
df = pd.DataFrame(data=[[1.5, 2.5, 10.0], [2.0, 4.5, 5.0], [2.5, 5.2, 8.0],
                        [4.5, 5.8, 4.8], [4.0, 6.3, 70], [4.1, 6.4, 9.0],
                        [5.1, 2.3, 11.1]],
                  columns=['Field_1', 'Field_2', 'Field_3'],
                  index=['a', 'b', 'c', 'd', 'e', 'f', 'g'])
 
 
# Apply function numpy.square() to square rows with index name 'b' and 'f'
df = df.apply(lambda x: np.square(x) if x.name in ['b', 'f'] else x, axis=1)
 
# Applying lambda function to find product of 3 columns
df = df.assign(Product=lambda x: (x['Field_1'] * x['Field_2'] * x['Field_3']))
df

Unnamed: 0,Field_1,Field_2,Field_3,Product
a,1.5,2.5,10.0,37.5
b,4.0,20.25,25.0,2025.0
c,2.5,5.2,8.0,104.0
d,4.5,5.8,4.8,125.28
e,4.0,6.3,70.0,1764.0
f,16.81,40.96,81.0,55771.5456
g,5.1,2.3,11.1,130.203


## Map

In [5]:
# List
numbers = [1, 2, 3, 4, 5]
squared = []

# Regular Function
def square(number):
    return number ** 2

# Regular for loop
for num in numbers:
    squared.append(num**2)

print(squared)

# Using the map() function
squared = map(square, numbers)
print(list(squared))

[1, 4, 9, 16, 25]
[1, 4, 9, 16, 25]


In [6]:
numbers = [-2, -1, 0, 1, 2]

# Using map() to calculate absolute values
abs_values = list(map(abs, numbers))
print(abs_values)

# Using map() to change data types
print(list(map(float, numbers)))

# Using map() to get the length of strings
words = ["Welcome", "to", "Real", "Python"]
print(list(map(len, words)))

[2, 1, 0, 1, 2]
[-2.0, -1.0, 0.0, 1.0, 2.0]
[7, 2, 4, 6]


In [7]:
# Combining map() and lambda
numbers = [1, 2, 3, 4, 5]
squared = map(lambda num: num**2, numbers)

print(list(squared))

[1, 4, 9, 16, 25]


## Filter

In [1]:
numbers = [-2, -1, 0, 1, 2]

# Filtering using a regular function
def extract_pos(numbers):
    pos_numbers = []
    for number in numbers:
        # Filtering condition
        if number > 0:
            pos_numbers.append(number)
    return pos_numbers

print(extract_pos(numbers))

# The same as above but with filter() and lambda
pos_numbers = filter(lambda n: n > 0, numbers)
print(list(pos_numbers))

[1, 2]
[1, 2]


## Reduce

In [9]:
# Create a list
numbers = list(range(1,21,1))
print(numbers, "\n")

# Define a function
def my_add(a, b):
    result = a + b
    print(f"{a} + {b} = {result}")
    return result

# Using reduce() to apply the function
reduce(my_add, numbers)

[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20] 

1 + 2 = 3
3 + 3 = 6
6 + 4 = 10
10 + 5 = 15
15 + 6 = 21
21 + 7 = 28
28 + 8 = 36
36 + 9 = 45
45 + 10 = 55
55 + 11 = 66
66 + 12 = 78
78 + 13 = 91
91 + 14 = 105
105 + 15 = 120
120 + 16 = 136
136 + 17 = 153
153 + 18 = 171
171 + 19 = 190
190 + 20 = 210


210

In [10]:
# Function to calculate the sum of even numbers only
def sum_even(it):
    return reduce(lambda x, y: 
                  x + y if not y % 2 else x, it, 0)

# Call the function
print(sum_even([1, 2, 3, 4, 5]))
print(sum_even(list(range(100))))
print(sum_even(list(range(1, 100, 2))))
print(sum_even(list(range(1, 100, 3))))

6
2450
0
784


In [11]:
from functools import reduce

numbers = [1, 2, 3, 4, 5]

# Without initializer
result = reduce(lambda x, y: x + y, numbers)
print(result) # Output: 15

# With initializer
result = reduce(lambda x, y: x + y, numbers, 10)
print(result) # Output: 25

15
25


## List comprehension

In [12]:
original_list = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]

# Using list comprehension to create a new list of squared even numbers
even_numbers = [num**2 for num in original_list if num % 2 == 0]

print(even_numbers)


[4, 16, 36, 64, 100]


In [13]:
original_list = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]

# Using list comprehension with lambda function to create a new list of squared even numbers
squares_of_even_numbers = [(lambda x: x**2)(num) for num in original_list if num % 2 == 0]

print(squares_of_even_numbers)

[4, 16, 36, 64, 100]


## Dictionary comprehension

In [14]:
people = {'Alice': 25, 'Bob': 35, 'Charlie': 40, 'David': 30}

# Using dictionary comprehension to create a new dictionary of names and name lengths
names_and_lengths = {name: len(name) for name, age in people.items()}

print(names_and_lengths)

# ... same as above but with if condition
names_and_lengths = {name: len(name) for name, age in people.items() if age > 30}

print(names_and_lengths)

{'Alice': 5, 'Bob': 3, 'Charlie': 7, 'David': 5}
{'Bob': 3, 'Charlie': 7}


In [15]:
people = {'Alice': 25, 'Bob': 35, 'Charlie': 40, 'David': 30}

# Using dictionary comprehension with lambda function to create a new dictionary of names and name lengths
names_and_lengths = {name: (lambda x: len(x))(name) for name, age in people.items()}

print(names_and_lengths)

# ... same as obove but with if condition
names_and_lengths = {name: (lambda x: len(x))(name) for name, age in people.items() if age > 30}

print(names_and_lengths)


{'Alice': 5, 'Bob': 3, 'Charlie': 7, 'David': 5}
{'Bob': 3, 'Charlie': 7}


## Exception handling

In [16]:
# Because x is not defined print(x) rises an error
# print(x)

# Try and except block to handle the exception
try:
    print(x)
except:
    print("An exception occurred!")

# Try and except block with pass (no output)
try:
    print(x)
except:
    pass

An exception occurred!


In [17]:
# Import module sys to get the type of exception
import sys

randomList = ['a', 0, 2]
for entry in randomList:
    try:
        print("The entry is", entry)
        r = 1/int(entry)
        break
    except:
        print("Oops!", sys.exc_info()[0], "occurred.")
        print("Next entry.")
        print()

print("The reciprocal of", entry, "is", r)

The entry is a
Oops! <class 'ValueError'> occurred.
Next entry.

The entry is 0
Oops! <class 'ZeroDivisionError'> occurred.
Next entry.

The entry is 2
The reciprocal of 2 is 0.5


In [18]:
# Using try and finally
try:
    f = open("test.txt", encoding = 'utf-8')
    
    # Print text from file
    print(f.read())
    
finally:
    
    f.close()

Dorothy lived in the midst of the great Kansas prairies, with Uncle Henry, who was a farmer, and Aunt Em, who was the farmer’s wife. Their house was small, for the lumber to build it had to be carried by wagon many miles. There were four walls, a floor and a roof, which made one room; and this room contained a rusty looking cookstove, a cupboard for the dishes, a table, three or four chairs, and the beds. Uncle Henry and Aunt Em had a big bed in one corner, and Dorothy a little bed in another corner. There was no garret at all, and no cellar—except a small hole dug in the ground, called a cyclone cellar, where the family could go in case one of those great whirlwinds arose, mighty enough to crush any building in its path. It was reached by a trap door in the middle of the floor, from which a ladder led down into the small, dark hole.


## Regular expressions

Bevor using a regex in a Python programm, it should be tested using additional tools, e.g.: https://regex101.com

In [19]:
# String
string = 'Hello 12! Hi 89! Howdy 34!'

# Using findall() from the 're' library to extract all numbers
result = re.findall('\d+', string)
print(result)

# Use map to convert strings to numerical values
result_final = list(map(int, result))
print(result_final)

['12', '89', '34']
[12, 89, 34]


In [20]:
# Example string with email addresses
str = '''a string with peter.meier@gmx.com 
         some emails mary.mueller@gmail.com 
         inside and some more emails
         urs.ursin@zhaw.ch and one additional  
         email anne.peter@eth-zurich.ch'''

# re.findall() returns email strings
emails = re.findall(r'[\w\.-]+@[\w\.-]+', str)
for email in emails:
    print(email)

peter.meier@gmx.com
mary.mueller@gmail.com
urs.ursin@zhaw.ch
anne.peter@eth-zurich.ch


#### Praxis example: Get all email addresses from a web page

In [3]:
# Practical example based on content from the AGVS website
response = get('https://www.agvs-upsa.ch/de/verband/mitgliederverzeichnis/liste')
soup = BeautifulSoup(response.content, "html.parser")
for child in soup.body.children:
    if child.name == 'script':
        child.decompose() 
txt = soup.body.get_text()

# re.findall() returns email strings
def email_extract(txt):
    emails = re.findall(r'[\w\.-]+@[\w\.-]+', txt)
    return(emails)

# Call the function
my_emails = email_extract(txt)
my_emails

['ebikon@emilfrey.ch',
 'info@garagestock.ch',
 'info@sportgarage-pachler.ch',
 'info@1a-autoservice.ch',
 'office@2mmotorsport.biz',
 'e58act@gmail.com',
 'mujdinarifi@msn.com',
 'info@garagemaeder.com',
 'abiszautocenter@gmail.com',
 'info@ammotorsport.ch',
 'garage_a.sokoli@hotmail.ch',
 'info@garage-bizzozero.ch',
 'shop@a-buergi.ch',
 'info@garage-doerig.ch',
 'a.gruetter@garage-gruetter.ch',
 'info@talackergarage.ch',
 'info@ameyerag.ch',
 'sekretariat@a-reinhard.ch',
 'garage.schoeni@bluewin.ch',
 'info@a1autoservice.ch',
 'info@aautilitaire.ch',
 'a.idrizi@aare-automobile.ch',
 'info@aare-touring.ch',
 'iwan.mueller@aaretal-garage.com',
 'info@ad-aaretal.ch',
 'aaz_autohaus@bluewin.ch',
 'rechnung@abag.ch',
 'stefan.rolli@abag.ch',
 'info@abc-mecanique.ch',
 'info@abcgarage-davos.ch',
 'info@abpgarage.ch',
 'info@abs-autoservice.ch',
 'info@abs-autoservice.ch',
 'info.liestal@abtautomobile.ch',
 'info.muttenz@abtautomobile.ch',
 'info.reinach@abtautomobile.ch',
 'm.steiner@alu-

### Jupyter notebook --footer info-- (please always provide this at the end of each notebook)

In [4]:
import os
import platform
import socket
from platform import python_version
from datetime import datetime

print('-----------------------------------')
print(os.name.upper())
print(platform.system(), '|', platform.release())
print('Datetime:', datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
print('Python Version:', python_version())
print('-----------------------------------')

-----------------------------------
POSIX
Darwin | 23.3.0
Datetime: 2024-06-05 14:11:06
Python Version: 3.10.13
-----------------------------------


In [32]:
import re
import pandas as pd
import json

# API credentials for Kaggle
with open('kaggle.json') as f:
    data = json.load(f)

os.environ['KAGGLE_USERNAME'] = data['username']
os.environ['KAGGLE_KEY'] = data['key']
from kaggle.api.kaggle_api_extended import KaggleApi

    # Initialize API
api = KaggleApi()
api.authenticate()

    # Download file
api.dataset_download_file('andrewmvd/trip-advisor-hotel-reviews',
                            'tripadvisor_hotel_reviews.csv')

    # Read data to pandas data frame
df = pd.read_csv('tripadvisor_hotel_reviews.csv.zip')
df

    # Extract values using the lambda function in combination with regex
pattern = "(great|excellent|good|beautiful)"
df['Tokens'] = df['Review'].apply(lambda x: re.findall(pattern, x))
df['Length'] = df['Tokens'].apply(lambda x: len(x))

    # Correlation of Rating with Length
print(df[['Rating', 'Length']].corr())

    # Show data
df

          Rating    Length
Rating  1.000000  0.173446
Length  0.173446  1.000000


Unnamed: 0,Review,Rating,Tokens,Length
0,nice hotel expensive parking got good deal sta...,4,"[good, great]",2
1,ok nothing special charge diamond member hilto...,2,"[good, good, great, good, good]",5
2,nice rooms not 4* experience hotel monaco seat...,3,"[good, great, great, great]",4
3,"unique, great stay, wonderful time hotel monac...",5,"[great, excellent, great, great, excellent]",5
4,"great stay great stay, went seahawk game aweso...",5,"[great, great, great, great, great, good]",6
...,...,...,...,...
20486,"best kept secret 3rd time staying charm, not 5...",5,[great],1
20487,great location price view hotel great quick pl...,4,"[great, great, great, great]",4
20488,"ok just looks nice modern outside, desk staff ...",2,[good],1
20489,hotel theft ruined vacation hotel opened sept ...,1,"[beautiful, excellent, great, good]",4
