In [34]:
import pandas as pd
import sys

# Working with DataFrames and Series in Pandas

## Common DataFrame Operations

1. **Reading Data**
   - Use `pd.read_csv()` with appropriate parameters
   - Check file separator (comma, tab, pipe, etc.)
   - Verify column names and data types

2. **Accessing Data**
   - Columns can be accessed using `df['column_name']`
   - Returns a Pandas Series object
   - Can be converted to Python list using `.to_list()`

3. **Basic Analysis**
   - Check data shape using `df.shape`
   - View column names using `df.columns`
   - Convert to different formats using `to_dict()`, `to_list()`, etc.

4. **Best Practices**
   - Always verify data after reading
   - Check for missing values
   - Confirm data types are correct

In [43]:
# Read the CSV file with correct separator
df = pd.read_csv("Data/Weather_data.csv")  # Remove sep="|" as the file uses commas
print("First few rows of the data:")
# print(df.head())
print(df)
print("\nDataFrame Type:")
print(type(df))

First few rows of the data:
         day  temp Condition
0     Monday    12     Sunny
1    Tuesday    14      Rain
2  Wednesday    12      Rain
3   Thursday    18    Cloudy
4     Friday    20     Sunny
5   Saturday    22     Sunny
6     Sunday    24     Sunny

DataFrame Type:
<class 'pandas.core.frame.DataFrame'>


In [36]:
print("\nDataFrame Columns:")
print(df.columns)


DataFrame Columns:
Index(['day', 'temp', 'Condition'], dtype='object')


In [37]:
df_dict = df.to_dict()
print(df_dict)


{'day': {0: 'Monday', 1: 'Tuesday', 2: 'Wednesday', 3: 'Thursday', 4: 'Friday', 5: 'Saturday', 6: 'Sunday'}, 'temp': {0: 12, 1: 14, 2: 12, 3: 18, 4: 20, 5: 22, 6: 24}, 'Condition': {0: 'Sunny', 1: 'Rain', 2: 'Rain', 3: 'Cloudy', 4: 'Sunny', 5: 'Sunny', 6: 'Sunny'}}


In [38]:
# Convert temperature column to list and show basic statistics
temp_list = df['temp'].to_list()
print(f"Number of temperature readings: {len(temp_list)}")
print(f"Temperature values: {temp_list}")
print(f"\nAverage temperature: {sum(temp_list)/len(temp_list):.2f}")

Number of temperature readings: 7
Temperature values: [12, 14, 12, 18, 20, 22, 24]

Average temperature: 17.43


In [40]:
# More DataFrame operations
print("DataFrame Info:")
print(df.info())

print("\nBasic Statistics:")
print(df.describe())

print("\nUnique values in 'Condition' column:")
print(df['Condition'].unique())

DataFrame Info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7 entries, 0 to 6
Data columns (total 3 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   day        7 non-null      object
 1   temp       7 non-null      int64 
 2   Condition  7 non-null      object
dtypes: int64(1), object(2)
memory usage: 300.0+ bytes
None

Basic Statistics:
            temp
count   7.000000
mean   17.428571
std     4.859943
min    12.000000
25%    13.000000
50%    18.000000
75%    21.000000
max    24.000000

Unique values in 'Condition' column:
['Sunny' 'Rain' 'Cloudy']


In [41]:
print(max(df['temp']))

24


In [45]:
print(df[df.temp == df.temp.max()])

      day  temp Condition
6  Sunday    24     Sunny


In [47]:
monday = df[df['day'] == 'Monday']

In [52]:
monday_temp = int(monday.temp)
monday_temp_f = monday_temp * 9/5 + 32  # Convert Celsius to Fahrenheit
print(f"\nMonday's temperature readings: {monday_temp_f}°F")


Monday's temperature readings: 53.6°F


  monday_temp = int(monday.temp)


In [57]:
data_dict = {
    "teachers": ["Alice", "Bob", "Charlie"],
    "subjects": ["Math", "Science", "History"],
    "years_of_experience": [5, 10, 3],
    "is_full_time": [True, True, False]
}
df = pd.DataFrame(data_dict)
print("\nDataFrame created from dictionary:")
print(df)
df.to_csv("Data/teachers_data.csv", index=False)


DataFrame created from dictionary:
  teachers subjects  years_of_experience  is_full_time
0    Alice     Math                    5          True
1      Bob  Science                   10          True
2  Charlie  History                    3         False
