# Lab 12(Handling CSV file)

# Reading a CSV file

In [1]:
import pandas as pd

# File path
file_path = r"D:\Apoo\Study materials\Food.csv"   # use raw string (r"...") to avoid \ issues

# Reading the CSV File
df = pd.read_csv(file_path)
print("Read CSV:\n", df.head())

Read CSV:
       Food    Rich_in  Calories
0  Almonds    Protein       576
1  Spinach       Iron        23
2   Salmon    Omega-3       208
3   Banana  Potassium        89
4     Eggs    Protein       155


# Extracting the contents of a CSV File into a Panda Dataframe

In [2]:
print("\nDataFrame contents:\n", df)


DataFrame contents:
              Food       Rich_in  Calories
0         Almonds       Protein       576
1         Spinach          Iron        23
2          Salmon       Omega-3       208
3          Banana     Potassium        89
4            Eggs       Protein       155
5         Avocado  Healthy Fats       160
6      Brown Rice         Fiber       111
7         Carrots     Vitamin A        41
8            Milk       Calcium        42
9  Dark Chocolate  Antioxidants       546


# Appending into a CSV

In [11]:
#Adding new value to the CSV file
new_data = pd.DataFrame({
    "Food": ["Oats", "Walnuts"],
    "Rich_in": ["Fiber", "Omega-3"],
    "Calories": [68, 654]
})
#adding the new row in the CSV file
new_data.to_csv(file_path, mode="a", header=False, index=False)#a is for append mode#index=False(Prevents pandas from writing the DataFrame’s index (0,1,2,...) into the CSV
                                                               #Otherwise, an extra column with row numbers would appear.#header=False(Prevents writing the column names again)


In [15]:
#Checking if the value was appended or not
df = pd.read_csv(r"D:\Apoo\Study materials\Food.csv")
print(df.tail())#to extract last 5 rows of the dataset

              Food       Rich_in  Calories
7          Carrots     Vitamin A        41
8             Milk       Calcium        42
9   Dark Chocolate  Antioxidants       546
10            Oats         Fiber        68
11         Walnuts       Omega-3       654


# Reading a CSV Chunk-by-chunk

In [21]:
#chunk of 2 data entry at a time
with pd.read_csv(file_path, chunksize=2) as reader:
    print(reader)
    for chunk in reader:
        print(chunk)#printing the chunk

<pandas.io.parsers.readers.TextFileReader object at 0x000001FBEED561A0>
      Food  Rich_in  Calories
0  Almonds  Protein       576
1  Spinach     Iron        23
     Food    Rich_in  Calories
2  Salmon    Omega-3       208
3  Banana  Potassium        89
      Food       Rich_in  Calories
4     Eggs       Protein       155
5  Avocado  Healthy Fats       160
         Food    Rich_in  Calories
6  Brown Rice      Fiber       111
7     Carrots  Vitamin A        41
             Food       Rich_in  Calories
8            Milk       Calcium        42
9  Dark Chocolate  Antioxidants       546
       Food  Rich_in  Calories
10     Oats    Fiber        68
11  Walnuts  Omega-3       654


In [32]:
#reading through a specific chunk of data 
with pd.read_csv(r"D:\Apoo\Study materials\Food.csv", iterator=True) as reader:
    print(reader.get_chunk(5))

      Food    Rich_in  Calories
0  Almonds    Protein       576
1  Spinach       Iron        23
2   Salmon    Omega-3       208
3   Banana  Potassium        89
4     Eggs    Protein       155


# Writing numeric data into a CSV File

In [None]:
import pandas as pd

file_path = r"D:\Apoo\Study materials\Food.csv"

df = pd.read_csv(file_path)

# Add numeric columns
df["Carbs"] = [10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, 120]

# Save back into the same CSV
df.to_csv(file_path, index=False)

In [36]:
print("Numeric columns added into Food.csv")
print(df.tail())  # showing last rows to confirm

Numeric columns added into Food.csv
              Food       Rich_in  Calories  Carbs
7          Carrots     Vitamin A        41     80
8             Milk       Calcium        42     90
9   Dark Chocolate  Antioxidants       546    100
10            Oats         Fiber        68    110
11         Walnuts       Omega-3       654    120


# Writing text data into a CSV File

In [40]:
quote = '"One cannot think well, love well, sleep well, if one has not dined well." - Virginia Woolf'

with open("D:\Apoo\Study materials\Food.csv", "a", encoding="utf-8") as f:
    f.write("\n" + quote)


In [43]:
df = pd.read_csv(r"D:\Apoo\Study materials\Food.csv")
print(df.tail())

                                                 Food       Rich_in  Calories  \
8                                                Milk       Calcium      42.0   
9                                      Dark Chocolate  Antioxidants     546.0   
10                                               Oats         Fiber      68.0   
11                                            Walnuts       Omega-3     654.0   
12  One cannot think well, love well, sleep well, ...           NaN       NaN   

    Carbs  
8    90.0  
9   100.0  
10  110.0  
11  120.0  
12    NaN  


In [45]:
with open(r"D:\Apoo\Study materials\Food.csv", encoding="utf-8") as f:
    lines = f.readlines()
    last_line = lines[-1].strip()   # extracting the text added
print(last_line)

"One cannot think well, love well, sleep well, if one has not dined well." - Virginia Woolf


# To look for something in a file

In [1]:
file_path = "D:\Apoo\Study materials\Food.csv"
search_word = "Banana"

with open(file_path, "r", encoding="utf-8") as f:
    for line_no, line in enumerate(f, start=1):
        if search_word in line:
            print(f"Found '{search_word}' in line {line_no}: {line.strip()}")

Found 'Banana' in line 5: Banana,Potassium,89.0,40.0


In [4]:
#Look for values inside a CSV using pandas
import pandas as pd
df = pd.read_csv("D:\Apoo\Study materials\Food.csv")
result = df[df["Food"] == "Banana"]
print(result)

     Food    Rich_in  Calories  Carbs
3  Banana  Potassium      89.0   40.0


# Some interesting Functions 

In [5]:
#Filter rows with a condition
high_cal = df[df["Calories"] > 200]
print(high_cal)

              Food       Rich_in  Calories  Carbs
0          Almonds       Protein     576.0   10.0
2           Salmon       Omega-3     208.0   30.0
9   Dark Chocolate  Antioxidants     546.0  100.0
11         Walnuts       Omega-3     654.0  120.0


In [7]:
#Handling missing values
df = pd.read_csv("D:\Apoo\Study materials\Food.csv")
df.fillna("Unknown", inplace=True)#makes data cleaner

In [8]:
# for chunk in pd.read_csv("bigData.csv", chunksize=10000):
#     print(chunk["Calories"].mean())Process millions of rows without crashing memory


In [12]:
#Conversion using lamda operators
df = pd.read_csv(r"D:\Apoo\Study materials\Food.csv",converters={"Calories": lambda x: float(x) * 4.184})#Converting to kilo
print(df.head())

      Food    Rich_in  Calories  Carbs
0  Almonds    Protein  2409.984   10.0
1  Spinach       Iron    96.232   20.0
2   Salmon    Omega-3   870.272   30.0
3   Banana  Potassium   372.376   40.0
4     Eggs    Protein   648.520   50.0
