In [None]:
import numpy as np
import pandas as pd

print("Numpy version:", np.__version__)
print("Pandas version:", pd.__version__)

# Python

### List
- Ordered, mutable, allows duplicates
- Allow different types in the list

https://www.w3schools.com/python/python_lists.asp

In [None]:
# instantiating lists
l = list([1, 2, 3 ]) # or l = [1, 2, 3]
l2 = ['test', 12, True, 3.14, 12]

# printing type of variables
print(F"{type(l)}")
print(F"{type(l2)}")

# mutating lists
l.append(4)
l.remove(2)
l[0] = 10

# printing lists length
print(f"Length of l: {len(l)}")
print(f"Length of l2: {len(l2)}")

# iterating through lists
for item in l:
    print(f"{item} is of type {type(item)}")

for item in l2:
    print(f"{item} is of type {type(item)}")

### Tuple
- Ordered, immutable, allows duplicates
- Allow different types in the list

https://www.w3schools.com/python/python_tuples.asp

In [None]:
# instantiating tuples
t = tuple([1, 'test', 3.14, False])

# immutability test
#t[0] = 10  # This will raise a TypeError

print(F"{type(t)}")


### Dictionaries

- Key-value pairs, ordered (Python 3.7+), mutable
- Keys are unique

https://www.w3schools.com/python/python_dictionaries.asp

In [None]:
# instantiating dictionaries
dict = {
    'key1': 'value1',
    'key2': 42
    }

# printing type of dictionary
print(F"{type(dict)}")

# mutating dictionary
dict['key3'] = 3.14
dict['key1'] = 'new_value1'

# duplicate keys test
# dict['key2'] = 'another_value'  # This will overwrite the previous value for 'key2'

# printing dictionary length
print(f"Length of dict: {len(dict)}")

# iterating through dictionary
for key, value in dict.items():
    print(f"{key}: {value} (type: {type(value)})")

# NumPy

### NumPy Array

- Allow ***one*** types in the list

https://www.w3schools.com/python/numpy/default.asp

In [None]:
# instantiating numpy array
npa = np.array([1, 2, 5, 4, 3])
#npa = np.array([1, 2, 5, 4, 'test']) #change all elements to same type (str in this case)

# printing type of numpy array
print(F"{type(npa)}")

# mutating numpy array
npa[0] = 10
npa = np.append(npa, 6)
npa = np.delete(npa, 2)  # removes the element at index 2

# printing numpy array length
print(f"Length of npa: {len(npa)}")

# printing numpy array
print(npa)

# iterating through numpy array
for item in npa:
    print(f"{item} is of type {type(item)}")


#### Typical loops

In [None]:
import numpy as np

# instantiating numpy array
npa = np.array([1, 2, 5, 4, 3])

for i in range(len(npa)):
    print(f"Index {i}: {npa[i]} (type: {type(npa[i])})")

for i in range(0,3):
    print(f"Index {i}: {npa[i]} (type: {type(npa[i])})")

#### List comprehension

In [None]:
# instantiating numpy array
npa = np.array([1, 2, 5, 4, 3])

# list comprehension to create a new list with squared values
squared_list = [x**2 for x in npa]
print(f"Squared list: {squared_list}")

# List comprehension with condition to filter even numbers
even_list = [x for x in npa if x % 2 == 0]
print(f"Even numbers list: {even_list}")

# List comprehension to create a list of tuples (number, square)
tuple_list = [(x, x**2) for x in npa]
print(f"List of tuples (number, square): {tuple_list}")

# List comprehension to print type and value
type_list = [f"{x} is of type {type(x)}" for x in npa]
for item in type_list:
    print(item)

#### Vectorized Operations

In [None]:
# instantiating numpy array
npa = np.array([1, 2, 5, 4, 3, 12])

# sort npa
npa = np.sort(npa)
print(f"Sorted numpy array: {npa}")

# vectorized operation to create a new numpy array with squared values
squared_npa = npa ** 2
print(f"Squared numpy array: {squared_npa}")

# vectorized operation to create a new numpy array with values greater than 10
filtered_npa = npa[npa > 10]
print(f"Filtered numpy array (values > 10): {filtered_npa}")

# Pandas - Series

- Named indices (access by label, not just position)

https://www.w3schools.com/python/pandas/pandas_series.asp

In [None]:
# instantiating pandas Series
serie = pd.Series([10, 20, 30, 40, 50], index=['a', 'b', 'c', 'd', 'e'])
# serie = pd.Series([10, 20, 'test', 40, 50]) # allow different types but all elements will be of type object
print(serie)

# accessing elements by label
print(f"Element with label 'c': {serie['c']}") # direct access by label
print(f"Element with label 'a': {serie.a}")   # attribute-style access
print(f"Element with label 'e': {serie.get('e')}") # using get method
#print(f"Element with label 'b': {serie[1]}") # access by position - works but is deprecated in future versions

# accessing elements by label using .loc
print(f"Element with label 'd': {serie.loc['d']}") # loc for label-based indexing

# accessing elements by position
print(f"Element at position 2: {serie.iloc[2]}") #iloc for position-based indexing

# automatic alignment during operations
sales_q1 = pd.Series([250, 300, 400], index=['Alice', 'Bob', 'Charles'])
sales_q2 = pd.Series([200, 350, 450], index=['John', 'Bob', 'Alice'])
total_sales = sales_q1 + sales_q2
print("Total Sales:\n", total_sales)

# handling NaN values during operations
total_sales = sales_q1.add(sales_q2, fill_value=0)
print("Total Sales (with NaN handled):\n", total_sales)


# Pandas - DataFrame

- Tabular data (rows and columns)
- Column names and index names (both are also refrerred to as 'labels')

In [None]:
# instantiating pandas DataFrame
df = pd.DataFrame({
    'Name': ['Alice', 'Bob', 'Charlie', 'David'],
    'Age': [25, 30, 35, 40],
    'City': ['New York', 'Los Angeles', 'Chicago', 'Houston'],
    'Sales_Q1': [250, 300, 400, 500],
    'Sales_Q2': [200, 350, 450, 550]
})
#df.index = df['Name']
#print(df)

# Alternative way to instantiate DataFrame with specified index
df = pd.DataFrame([
    ['Alice', 25, 'New York', 250, 200],
    ['Bob', 30, 'Los Angeles', 300, 350],
    ['Charlie', 35, 'Chicago', 400, 450],
    ['David', 40, 'Houston', 500, 550]
], columns=['Name', 'Age', 'City', 'Sales_Q1', 'Sales_Q2'])

df.index = df['Name']
print(df)

# adding a new row
new_row = pd.DataFrame([['Eve', 28, 'Miami', 320, 370]], columns=df.columns, index=['Eve'])
df = pd.concat([df, new_row])

# removing a row
#df = df.drop('Bob')

# accessing columns
print("Age column:\n", df['Age'])

# accessing rows by label
print("Row for Charlie:\n", df.loc['Charlie'])

# accessing rows by position
print("First row:\n", df.iloc[0])

# accessing a specific element
print("Sales_Q2 for David:", df.loc['David', 'Sales_Q2'])

# add column with calculated total sales
df['Total_Sales'] = df['Sales_Q1'] + df['Sales_Q2']
print("DataFrame with Total_Sales:\n", df)

# filtering rows based on condition
high_sales = df[df['Total_Sales'] > 800]
print("Rows with Total_Sales > 800:\n", high_sales)

# creating a boolean Series based on condition
over_800_sales = df['Total_Sales'] > 800
print("Boolean Series for Total_Sales > 800:\n", over_800_sales)

# using the boolean Series to filter the DataFrame
filtered_df = df[over_800_sales]
print("Filtered DataFrame using boolean Series:\n", filtered_df)


#### Slicing

In [None]:
# slicing specific range of rows and columns (getting rows from 'Alice' to 'David' and columns 'Name' and 'Total_Sales')
subset = df.loc['Alice':'David', ['Name', 'Total_Sales']]
print("Subset of DataFrame:\n", subset) # note that the stop label is inclusive

# slicing specific range of rows and columns by position (get rows 0 to 2 and columns 0 and 5)
subset_pos = df.iloc[0:3, [0, 5]]
print("Subset of DataFrame by position:\n", subset_pos) #note that the stop index is exclusive

# slicing specific element by position (getting rows from 'Bob' to 'Eve' and columns 3 to 5)
subset_col = df.loc['Bob':'Eve'].iloc[:, 3:5]
print("Subset of DataFrame by columns:\n", subset_col)
