In [None]:
import pandas as pd
import numpy as np
import random

# Create a large DataFrame
num_rows = 10000
data = {
    'CustomerID': np.random.randint(1000, 5000, num_rows),
    'ProductName': [random.choice(['Laptop', 'Smartphone', 'Tablet', 'Headphones', 'Charger']) for _ in range(num_rows)],
    'PurchaseDate': pd.to_datetime(np.random.choice(pd.date_range('2020-01-01', '2023-12-31'), num_rows)),
    'Quantity': np.random.randint(1, 10, num_rows),
    'UnitPrice': np.random.uniform(50, 2000, num_rows).round(2),
    'City': [random.choice(['New York', 'London', 'Tokyo', 'Paris', 'Sydney', 'Mumbai', 'Berlin', 'Toronto']) for _ in range(num_rows)],
    'Rating': np.random.choice([1, 2, 3, 4, 5], num_rows),
    'IsMember': np.random.choice([True, False], num_rows)
}

df = pd.DataFrame(data)

# 50 Pandas Questions

# 1. Basic DataFrame Operations:

# 1. Display the first 10 rows of the DataFrame.
# 2. Display the last 5 rows of the DataFrame.
# 3. Get the shape (number of rows and columns) of the DataFrame.
# 4. Get the data types of each column.
# 5. Get summary statistics (mean, median, etc.) of numeric columns.
# 6. Display the unique values in the 'ProductName' column.
# 7. Count the number of unique customers.
# 8. Display the column names of the DataFrame.
# 9. Check if there are any missing values in the DataFrame.
# 10. Get the total number of missing values in each column.

# 2. Selection and Filtering:

# 11. Select the 'CustomerID' and 'ProductName' columns.
# 12. Filter the DataFrame to show only purchases made in 'New York'.
# 13. Filter the DataFrame to show purchases with a 'Quantity' greater than 5.
# 14. Filter the DataFrame to show purchases made after '2023-01-01'.
# 15. Filter the DataFrame to show purchases of 'Laptop' with a 'Rating' of 5.
# 16. Select rows where 'IsMember' is True.
# 17. Select rows where 'UnitPrice' is between 500 and 1000.
# 18. Select rows where 'City' is either 'London' or 'Paris'.
# 19. Select rows where 'Rating' is less than 3.
# 20. Select all rows where the 'ProductName' is 'Smartphone' and 'Quantity' is greater than 2.

# 3. Data Manipulation:

# 21. Create a new column 'TotalPrice' by multiplying 'Quantity' and 'UnitPrice'.
# 22. Convert the 'PurchaseDate' column to datetime objects.
# 23. Group the DataFrame by 'CustomerID' and calculate the total 'TotalPrice' for each customer.
# 24. Group the DataFrame by 'ProductName' and calculate the average 'Rating'.
# 25. Sort the DataFrame by 'PurchaseDate' in descending order.
# 26. Rename the 'UnitPrice' column to 'Price'.
# 27. Drop the 'IsMember' column.
# 28. Create a 'Year' column from the 'PurchaseDate' column.
# 29. Create a 'Month' column from the 'PurchaseDate' column.
# 30. Apply a function to round the 'TotalPrice' to the nearest whole number.

# 4. Aggregation and Grouping:

# 31. Calculate the total 'Quantity' sold for each 'ProductName'.
# 32. Calculate the average 'Price' for each 'City'.
# 33. Find the maximum 'TotalPrice' for each 'CustomerID'.
# 34. Find the minimum 'Rating' for each 'ProductName'.
# 35. Count the number of purchases made in each 'City'.
# 36. Calculate the total revenue (sum of 'TotalPrice') for each 'Year'.
# 37. Calculate the average 'Rating' for members and non-members separately.
# 38. Calculate the total 'Quantity' sold for each 'ProductName' and 'City' combination.
# 39. Calculate the standard deviation of 'Price' for each 'ProductName'.
# 40. Find the customer who made the highest total purchase ('TotalPrice').

# 5. Time Series and Advanced Operations:

# 41. Find the number of purchases made each month.
# 42. Calculate the cumulative sum of 'TotalPrice' sorted by 'PurchaseDate'.
# 43. Calculate the rolling average of 'Price' over a 7-day window.
# 44. Resample the DataFrame by month and calculate the average 'TotalPrice'.
# 45. Find the percentage change in 'TotalPrice' from the previous row.
# 46. Pivot the DataFrame to show 'Quantity' sold for each 'ProductName' in each 'City'.
# 47. Melt the DataFrame to transform the 'City' columns into rows.
# 48. Apply a custom function to categorize 'TotalPrice' into 'Low', 'Medium', and 'High' based on quantiles.
# 49. Find the top 3 customers with the highest average purchase value.
# 50. Create a new DataFrame showing the percentage of total sales for each product.

In [3]:
df.to_csv('DFQuestion50.csv')