# Python fundamentals part 1

## Input and output

In [27]:
print("Hello")

Hello


In [28]:
myfruit = input()

print(myfruit)




In [29]:
print(f"I like {myfruit}")
print("I like " + myfruit) # without f-string

I like 
I like 


## Data types

In [30]:
name = "Katrin"
name

'Katrin'

In [31]:
type(name)

str

In [32]:
age = 39.1 # double
number_of_children = 1 # int
loves_math = True # bool

In [33]:
name, age, number_of_children, loves_math # when we write like this we get a tuple!

('Katrin', 39.1, 1, True)

## f-strings

In [34]:
# we get class 'str' when we use fstring with type string, it has to do with
# that comes from OOP __str__ 
print(f"type(name): {type(name)}") 
print(f"type(age): {type(age)}") 
print(f"type(loves_math): {type(loves_math)}") 
print(f"{type(number_of_children) =}") 


type(name): <class 'str'>
type(age): <class 'float'>
type(loves_math): <class 'bool'>
type(number_of_children) =<class 'int'>


In [35]:
# worth remembering if we want to quickly understand our variables!
print(f"{name =}")

name ='Katrin'


## Collection types: list, tuple, set, dictionary

## list

In [36]:
interests = ["badminton", "yoga", "math", "programming"]
interests

['badminton', 'yoga', 'math', 'programming']

In [37]:
# in python it is possible to mix the types
person = ["Katrin", 39.1, 1, True]
person

['Katrin', 39.1, 1, True]

In [38]:
# we can append one list into another list
person.append(interests)

['Katrin', 39.1, 1, True, ['badminton', 'yoga', 'math', 'programming']]

In [40]:
# lists are mutable, it is possible to change them
person

['Katrin', 39.1, 1, True, ['badminton', 'yoga', 'math', 'programming']]

In [41]:
person.append(100)

In [43]:
person
# it is possible to add other objects into a list, you can have a list in a list

['Katrin', 39.1, 1, True, ['badminton', 'yoga', 'math', 'programming'], 100]

**Accessing items/elements in a list**

In [46]:
person

['Katrin', 39.1, 1, True, ['badminton', 'yoga', 'math', 'programming'], 100]

In [48]:
person[0], person[1], person[-1], person[-2]

('Katrin', 39.1, 100, ['badminton', 'yoga', 'math', 'programming'])

In [50]:
person[-2][1]

'yoga'

**Slicing list**

In [51]:
# slicing does not exist in many languages, but luckily in python it does
person

['Katrin', 39.1, 1, True, ['badminton', 'yoga', 'math', 'programming'], 100]

In [53]:
person[:2] # I get all items from 0 to 2-not inclusive

['Katrin', 39.1]

In [55]:
person[0::2] # here I get every second element

['Katrin', 1, ['badminton', 'yoga', 'math', 'programming']]

In [56]:
interests, interests[::-1]

(['badminton', 'yoga', 'math', 'programming'],
 ['programming', 'math', 'yoga', 'badminton'])

**iterate over a list (loop over)**

In [57]:
# this is how you make a list if you come from another programming language
numbers = []
for i in range(10):
    numbers.append(i)

numbers

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

In [58]:
# a faster, more pythonic way is a list comprehension
numbers = [i for i in range(10)]
numbers

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

In [59]:
squares = [i**2 for i in numbers]
squares

[0, 1, 4, 9, 16, 25, 36, 49, 64, 81]

In [60]:
for square in squares:
    print(square)

0
1
4
9
16
25
36
49
64
81


In [62]:
# tuple - immutable collection
# set - unique collection of values

## Dictionary
- key: value (pair)

In [63]:
# one way is to use a dict() function
person_dict_syntax = dict(name = "Katrin", age = 39.1)
person_dict_syntax

{'name': 'Katrin', 'age': 39.1}

In [66]:
# another way is to use the brackett syntax
person_dict_bracket_syntax = {'name': 'Katrin', 'age': 39.1}
person_dict_bracket_syntax.keys(), person_dict_syntax.keys()

(dict_keys(['name', 'age']), dict_keys(['name', 'age']))

In [68]:
person_dict_syntax["name"], person_dict_syntax["age"]

('Katrin', 39.1)

In [72]:
# both dictionaries have different memory adress, so they are not the same object. But they have same content
hex(id(person_dict_bracket_syntax)), hex(id(person_dict_syntax))

('0x17a731fb180', '0x17a7315cd40')

In [73]:
# we can iterate throguh a dictionary
for key in person_dict_syntax:
    print(key)

name
age


In [74]:
# we can iterate throguh a dictionary, get also a value
for key in person_dict_syntax:
    print(key, person_dict_syntax[key])

name Katrin
age 39.1


In [76]:
# we can get the keys and values in a more pythonic way
for key, value in person_dict_syntax.items(): # items() with give you a key and value as a tuple
    print(key, value)

name Katrin
age 39.1


**dictionary comprehension**

In [81]:
ml_terms = [
    "Supervised Learning",
    "Unsupervised Learning",
    "Feature Engineering",
    "Overfitting",
    "Cross-Validation"
]


ml_explanations = [
    "Learns from labeled data.",
    "Finds patterns in unlabeled data.",
    "Enhances input features for models.",
    "Model fits training data too closely.",
    "Evaluates model performance robustly."
]
# zip mean that 1st element from first list goes together with first element from second list an so on
{term.lower(): explanation.lower() for term, explanation in zip(ml_terms, ml_explanations)} 

{'supervised learning': 'learns from labeled data.',
 'unsupervised learning': 'finds patterns in unlabeled data.',
 'feature engineering': 'enhances input features for models.',
 'overfitting': 'model fits training data too closely.',
 'cross-validation': 'evaluates model performance robustly.'}

## Strings
- important to understand how to manipulate strings as we will clean a lot of data in our work

In [83]:
# oops weird formatting 
ml_term = "sUperVised   Learning  "
ml_term

'sUperVised   Learning  '

In [85]:
len(ml_term)

23

In [89]:
# clean the extra spaces, we remove leading and trailing spaces with strip()
ml_term.strip()
len(ml_term.strip())

21

In [91]:
ml_term.split()

['sUperVised', 'Learning']

In [96]:
# join() joins each element of the list with one space in between
ml_term_cleaned = " ".join(ml_term.split()).lower()
ml_term_cleaned

'supervised learning'

In [99]:
# note: strings are lists of characters, so we can aplly list functions to them
ml_term_cleaned[0], ml_term_cleaned[-1], ml_term_cleaned[4:7]

('s', 'g', 'rvi')

In [101]:
# concatenation = you can add strings together
"hello" + " world"

'hello world'

In [103]:
# + operator is overloaded, it will act differently depending on the types
5+5, "5"+"5" 

(10, '55')

In [104]:
"5"*5

'55555'

## if-statement

In [106]:
predicted_probability = 0.8

if predicted_probability > .5:
    y_pred = 1
else:
    y_pred = 0

y_pred

1

In [108]:
# one line if-else statement
predicion = "positive" if y_pred else "negative" # it is enough if y_pred is truthy, meaning different than 0
predicion

'positive'

In [109]:
# one line if-else statement
y_pred = 5
predicion = "positive" if y_pred else "negative" # it is enough if y_pred is truthy, meaning different than 0
predicion

'positive'

In [110]:
# one line if-else statement
y_pred = 0 # this will change the outcome to negative

predicion = "positive" if y_pred else "negative" # it is enough if y_pred is truthy, meaning different than 0

predicion

'negative'

In [111]:
# if-elif-else

accuracy = .6

if accuracy > 0.9:
    model_performance = "Good"
elif accuracy > 0.7:
    model_performance = "Moderate"
else:
    model_performance = "Bad"

accuracy, model_performance

(0.6, 'Bad')

## while statement
- iterative control structure
- iteratively repeat execution as long as specified condition remains True
- can be seen as an "if-loop"

#### Example - oil leakage
There is an oil leakage causing the bird population in an island to 1/2 in each year. From start there were 80000 birds, how many years does it take for it to have 1/10 remaining?

In [115]:
birds = 80000
year = 0

while birds > 8000:
    print(f"Year {year} there were {birds:.0f}")
    birds *=0.5
    year +=1

print(f"It takes {year} years for the birds to have 1/10 birds remaining")


Year 0 there were 80000
Year 1 there were 40000
Year 2 there were 20000
Year 3 there were 10000
It takes 4 years for the birds to have 1/10 birds remaining


## for statement
- iterative control structure
- iterates over a sequence (list, tuple, string, range, ...)
- typically used when you know number of iterations beforehand

In [116]:
unsupervised_abbreviations = [
    "K-Means",
    "PCA",
    "GMM",
]

unsupervised_algorithms = [
    "K-Means Clustering",
    "Principal Component Analysis",
    "Gaussian Mixture Model",
]

for abbreviation in unsupervised_abbreviations:
    print(abbreviation)

K-Means
PCA
GMM


In [118]:
for abbreviation, algorithm in zip(unsupervised_abbreviations, unsupervised_algorithms):
    print(abbreviation,": ", algorithm)

K-Means :  K-Means Clustering
PCA :  Principal Component Analysis
GMM :  Gaussian Mixture Model


In [126]:
# don't do this, this is not pythonic
for i in range(len(unsupervised_abbreviations)):
    print(f"abbreviation {i}: {unsupervised_abbreviations[i]}")

print()

# pythonic - idiomatic way of writing python
for i, abbreviation in enumerate(unsupervised_abbreviations):
    print(f"abbreviation {i}: {abbreviation}")

print()

# pythonic - idiomatic way of writing python
for i, abbreviation in enumerate(unsupervised_abbreviations,5):
    print(f"abbreviation {i}: {abbreviation}")

abbreviation 0: K-Means
abbreviation 1: PCA
abbreviation 2: GMM

abbreviation 0: K-Means
abbreviation 1: PCA
abbreviation 2: GMM

abbreviation 5: K-Means
abbreviation 6: PCA
abbreviation 7: GMM
