In [1]:
import pandas as pd
s = pd.Series([1, 3, 5, 7, 9], name="numbers")
print(s)


0    1
1    3
2    5
3    7
4    9
Name: numbers, dtype: int64


In [2]:
data = {
    'Name': ['Alice', 'Bob', 'Charlie'],
    'Age': [25, 30, 35],
    'City': ['NYC', 'LA', 'Chicago']
}
df = pd.DataFrame(data)
print(df)


      Name  Age     City
0    Alice   25      NYC
1      Bob   30       LA
2  Charlie   35  Chicago


In [None]:
# Read CSV
df = pd.read_csv('data.csv')

# Write CSV
df.to_csv('output.csv', index=False)


In [None]:
# Read Excel (requires openpyxl or xlrd)
df = pd.read_excel('data.xlsx', sheet_name='Sheet1')

# Write Excel
df.to_excel('output.xlsx', index=False)


In [None]:
# Read JSON
df = pd.read_json('data.json')

# Write JSON
df.to_json('output.json', orient='records')


In [None]:
import sqlite3

# Read from SQL
conn = sqlite3.connect('database.db')
df = pd.read_sql_query('SELECT * FROM employees', conn)

# Write to SQL
df.to_sql('employees', conn, if_exists='replace', index=False)
conn.close()


In [None]:
# Add new column
df['Salary'] = [50000, 60000, 70000]
df['Bonus'] = df['Salary'] * 0.1  # Calculated column
# Update values
df['Age'] = df['Age'] + 1
df.loc[df['City'] == 'NYC', 'Salary'] *= 1.1
# Rename columns
df.rename(columns={'Name': 'FullName', 'Age': 'Years'}, inplace=True)
# Drop columns
df.drop(columns=['City'], inplace=True)
# Change data types
df['Age'] = df['Age'].astype('int32')
df['JoinDate'] = pd.to_datetime(df['JoinDate'])


In [4]:
# Vertical concatenation (same columns)
df1 = pd.DataFrame({'A': [1, 2], 'B': [3, 4]})
df2 = pd.DataFrame({'A': [5, 6], 'B': [7, 8]})
result = pd.concat([df1, df2], ignore_index=True)

# Horizontal concatenation (same rows)
df3 = pd.DataFrame({'C': [9, 10], 'D': [11, 12]})
result = pd.concat([df1, df3], axis=1)
print(result)


   A  B   C   D
0  1  3   9  11
1  2  4  10  12


In [5]:
# Sample DataFrames
employees = pd.DataFrame({
    'ID': [1, 2, 3],
    'Name': ['Alice', 'Bob', 'Charlie'],
    'DeptID': [101, 102, 101]
})

departments = pd.DataFrame({
    'DeptID': [101, 102],
    'DeptName': ['HR', 'Engineering']
})


In [None]:
# Inner Join (default)
merged = pd.merge(employees, departments, on='DeptID')

# Left Join
merged_left = pd.merge(employees, departments, on='DeptID', how='left')

# Right Join
merged_right = pd.merge(employees, departments, on='DeptID', how='right')

# Outer Join
merged_outer = pd.merge(employees, departments, on='DeptID', how='outer')

# Merge on different column names
merged_custom = pd.merge(df1, df2, left_on='col1', right_on='col2')


In [None]:
# Load data
employees = pd.read_csv('employees.csv')
# Basic inspection
print(employees.info())
print(employees.describe())
# Handle missing values
employees['Salary'].fillna(employees['Salary'].median(), inplace=True)
# Remove duplicates
employees = employees.drop_duplicates(subset=['EmployeeID'])
# Add calculated column
employees['Bonus'] = employees['Salary'] * 0.15
# Group analysis
dept_stats = employees.groupby('Department').agg({
    'Salary': ['mean', 'median', 'count'],
    'Age': 'mean'
})


In [None]:
# Load multiple datasets
sales_q1 = pd.read_csv('sales_q1.csv')
sales_q2 = pd.read_csv('sales_q2.csv')
products = pd.read_excel('products.xlsx')
# Concatenate quarterly sales
all_sales = pd.concat([sales_q1, sales_q2], ignore_index=True)
# Merge with product info
sales_with_products = pd.merge(all_sales, products, on='ProductID')
# Calculate revenue
sales_with_products['Revenue'] = sales_with_products['Quantity'] * sales_with_products['Price']
# Group by product category
category_revenue = sales_with_products.groupby('Category')['Revenue'].sum()


In [None]:
# Load multiple datasets
sales_q1 = pd.read_csv('sales_q1.csv')
sales_q2 = pd.read_csv('sales_q2.csv')
products = pd.read_excel('products.xlsx')
# Concatenate quarterly sales
all_sales = pd.concat([sales_q1, sales_q2], ignore_index=True)
# Merge with product info
sales_with_products = pd.merge(all_sales, products, on='ProductID')
# Calculate revenue
sales_with_products['Revenue'] = sales_with_products['Quantity'] * sales_with_products['Price']
# Group by product category
category_revenue = sales_with_products.groupby('Category')['Revenue'].sum()


In [None]:
# Load customer data
customers = pd.read_json('customers.json')

# Clean data
# 1. Standardize text
customers['Name'] = customers['Name'].str.title()
customers['Email'] = customers['Email'].str.lower()

# 2. Handle duplicates
customers = customers.drop_duplicates(subset=['Email', 'Phone'])

# 3. Fix data types
customers['SignupDate'] = pd.to_datetime(customers['SignupDate'])
customers['Age'] = pd.to_numeric(customers['Age'], errors='coerce')

# 4. Handle outliers
q1 = customers['Age'].quantile(0.25)
q3 = customers['Age'].quantile(0.75)
iqr = q3 - q1
customers = customers[(customers['Age'] >= q1 - 1.5*iqr) & 
                      (customers['Age'] <= q3 + 1.5*iqr)]

# Save cleaned data
customers.to_csv('cleaned_customers.csv', index=False)
