In [None]:
# Store data in a tabular form

In [5]:
# List tof Lists 

# Example of a table with 4 columns
table = [
    ["Name", "Age", "City", "Occupation"],
    ["Alice", 30, "New York", "Engineer"],
    ["Bob", 25, "Los Angeles", "Artist"],
    ["Charlie", 35, "Chicago", "Teacher"]
]

print (table)...

[['Name', 'Age', 'City', 'Occupation'], ['Alice', 30, 'New York', 'Engineer'], ['Bob', 25, 'Los Angeles', 'Artist'], ['Charlie', 35, 'Chicago', 'Teacher']]


In [7]:
# Example of a table with 4 columns
table = [
    {"Name": "Alice", "Age": 30, "City": "New York", "Occupation": "Engineer"},
    {"Name": "Bob", "Age": 25, "City": "Los Angeles", "Occupation": "Artist"},
    {"Name": "Charlie", "Age": 35, "City": "Chicago", "Occupation": "Teacher"}
]

table

[{'Name': 'Alice', 'Age': 30, 'City': 'New York', 'Occupation': 'Engineer'},
 {'Name': 'Bob', 'Age': 25, 'City': 'Los Angeles', 'Occupation': 'Artist'},
 {'Name': 'Charlie', 'Age': 35, 'City': 'Chicago', 'Occupation': 'Teacher'}]

In [1]:
from collections import namedtuple

# Define the structure
Row = namedtuple('Row', ['Name', 'Age', 'City', 'Occupation'])

# Create the table
table = [
    Row("Alice", 30, "New York", "Engineer"),
    Row("Bob", 25, "Los Angeles", "Artist"),
    Row("Charlie", 35, "Chicago", "Teacher")
]

# Access data
for row in table:
    print(row)


Row(Name='Alice', Age=30, City='New York', Occupation='Engineer')
Row(Name='Bob', Age=25, City='Los Angeles', Occupation='Artist')
Row(Name='Charlie', Age=35, City='Chicago', Occupation='Teacher')


In [9]:
# implementation of table as row or column ... using a dataframe 




import pandas as pd

# Example of a table with 4 columns
data = {
    "Name": ["Alice", "Bob", "Charlie"],
    "Age": [30, 25, 35],
    "City": ["New York", "Los Angeles", "Chicago"],
    "Occupation": ["Engineer", "Artist", "Teacher"]
}

df = pd.DataFrame(data)

Index: 0
Name: Alice, Age: 30, City: New York

Index: 1
Name: Bob, Age: 25, City: Los Angeles

Index: 2
Name: Charlie, Age: 35, City: Chicago




### iterrows(): Provides an iterator yielding index and row as a Series. Good for simple row access.
### itertuples(): Provides an iterator yielding named tuples. Faster and more memory-efficient.
### apply(): Useful for applying functions row-wise, though not always the most intuitive for basic iteration.
### DataFrame.loc[]: Can be used to access specific rows, but generally less efficient for iteration.

When choosing the best method for iterating through rows in a Pandas DataFrame, several factors need to be considered, including performance, readability, and use case. Here’s a breakdown of each method in terms of these factors:

### 1. Performance
#### a. iterrows()
    Performance: Slower compared to itertuples() because it returns a Series for each row, which involves overhead in constructing Series objects.
    Use Case: Useful for simple cases where row data needs to be accessed as a Series.

#### b. itertuples()
    Performance: Generally faster and more memory-efficient than iterrows(). It returns named tuples which are faster to construct and access.
    Use Case: Suitable for most scenarios where you need to iterate over rows without needing the extra overhead of Series objects.

#### c. apply()
    Performance: Can be less efficient for large DataFrames because it applies a function to each row or column. The overhead of calling a Python function for each row can be significant.
    Use Case: Ideal for applying functions or transformations across rows or columns, especially if the function itself is vectorized or optimized.

#### d. DataFrame.loc[]
    Performance: Can be inefficient for row-wise iteration due to repeated access to DataFrame rows by index. This method is not typically recommended for iterating over all rows.
    Use Case: Useful for accessing specific rows or performing operations based on index, but less efficient for general iteration.

### 2. Readability
#### a. iterrows()
    Readability: Generally more readable and intuitive, especially for those new to Pandas. Returns rows as Series, making it easy to work with individual columns by name.

#### b. itertuples()
    Readability: Slightly less intuitive because it returns named tuples, but still straightforward. Field names are accessible as attributes of the tuple, making it relatively easy to work with.

#### c. apply()
    Readability: Can be less readable if the function being applied is complex. However, it can simplify operations when a function is clearly defined.

#### d. DataFrame.loc[]
    Readability: Clear when used for accessing specific rows or columns by index, but not ideal for iteration due to the need to repeatedly access rows.

### 3. Use Case
#### a. iterrows()
    Use Case: Best for scenarios where row-by-row manipulation or inspection is needed and performance is not a critical issue.

#### b. itertuples()
    Use Case: Preferred for most iteration needs due to its balance of performance and ease of use. Suitable for large DataFrames.

#### c. apply()
    Use Case: Useful for applying functions or transformations to rows or columns, particularly if the function is vectorized or efficient.

#### d. DataFrame.loc[]
    Use Case: Best used for accessing specific rows or columns, not for general iteration. Suitable for operations where specific indices are needed.

#### Summary of Recommendations
    For Performance: Use itertuples() for general iteration, as it is the fastest and most memory-efficient method.
    For Readability: iterrows() is often the most readable and intuitive for simple use cases.
    For Function Application: Use apply() when you need to apply a function across rows or columns.
    For Specific Row Access: Use DataFrame.loc[] for direct row or column access based on index.

In [17]:
# 1. Using iterrows()
# The iterrows() method provides an iterator that yields index and row data as a Pandas Series for each row. 
# This is a straightforward way to access each row in a loop.
# print(df)
# Loop through each row using iter 
for index, row in df.iterrows():
    print(f"Index: {index}")
    print(f"Name: {row['Name']}, Age: {row['Age']}, City: {row['City']}")
    print()




Index: 0
Name: Alice, Age: 30, City: New York

Index: 1
Name: Bob, Age: 25, City: Los Angeles

Index: 2
Name: Charlie, Age: 35, City: Chicago



In [15]:
# 2. Using itertuples()
# The itertuples() method provides an iterator that yields named tuples for each row. This is generally faster than iterrows() and can be more convenient if you don’t need the row as a Series.

import pandas as pd

# Sample DataFrame
df = pd.DataFrame({
    "Name": ["Alice", "Bob", "Charlie"],
    "Age": [30, 25, 35],
    "City": ["New York", "Los Angeles", "Chicago"]
})

# Loop through each row
for row in df.itertuples(index=True):
    print(f"Index: {row.Index}")
    print(f"Name: {row.Name}, Age: {row.Age}, City: {row.City}")
    print()


Index: 0
Name: Alice, Age: 30, City: New York

Index: 1
Name: Bob, Age: 25, City: Los Angeles

Index: 2
Name: Charlie, Age: 35, City: Chicago



In [None]:
# Using apply() with a Lambda Function
# The apply() method can be used to apply a function to each row. This method is typically used for applying operations or transformations, 
# but it can also be used to iterate through rows.

import pandas as pd

# Sample DataFrame
df = pd.DataFrame({
    "Name": ["Alice", "Bob", "Charlie"],
    "Age": [30, 25, 35],
    "City": ["New York", "Los Angeles", "Chicago"]
})

# Function to process each row
def process_row(row):
    print(f"Name: {row['Name']}, Age: {row['Age']}, City: {row['City']}")
    print()

# Apply function to each row
df.apply(process_row, axis=1)

In [None]:
# Using DataFrame.loc[]
# If you need to access specific rows or columns, DataFrame.loc[] can be used within a loop. However, 
# this is generally less efficient compared to other methods for iterating through rows

import pandas as pd

# Sample DataFrame
df = pd.DataFrame({
    "Name": ["Alice", "Bob", "Charlie"],
    "Age": [30, 25, 35],
    "City": ["New York", "Los Angeles", "Chicago"]
})

# Loop through each row by index
for i in range(len(df)):
    row = df.loc[i]
    print(f"Name: {row['Name']}, Age: {row['Age']}, City: {row['City']}")
    print()
