In [49]:
### Covered in this section are the following concepts:
# Column/Row Operations
# Sorting and Ranking
# Aggregation and Grouping

### Library Imports
import pandas as pd
import yfinance as yf

In [50]:
### Create a Function to Populate the DataFrame
def get_data(tickers, start_date, end_date):
    # Create an empty DataFrame
    stock_data = pd.DataFrame()
    # Populate the DataFrame with adjusted close prices for each ticker
    for ticker in tickers:
        # Download data for the current ticker
        stock_data[ticker] = yf.download(ticker, start = start_date, end = end_date)['Adj Close']
        # Reset index
        stock_data.reset_index()
    
    return stock_data

# Function Call
tickers = ['GOOG', 'AAPL', 'NVDA']
start_date = '2024-01-01'
end_date = '2024-08-01'
stock_data = get_data(tickers, start_date, end_date)

[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


#### Column/Row Operations
- `DataFrame.apply()` - Apply a function along an axis of the DataFrame, useful for functions applied to rows or columns.
- `DataFrame.assign()` - Add new columns to a DataFrame, returning a new object with all original and new columns.
- `DataFrame.drop()` - Remove specified labels from rows or columns.
- `Series.map()` - Apply a mapping or function to the elements of the Series.
- `DataFrame.drop_duplicates()` - Remove duplicate rows, optionally considering specific columns.
- `DataFrame.rename()` - Alter axes labels.
- `DataFrame.reset_index()` - Reset the index of the DataFrame, and use the default one instead.


In [51]:
### Column and Row Operations

# Drop the NVDA col
new_df = stock_data.drop(columns = ['NVDA'])
new_df.tail()

# Drop duplicates in full df
new_df.drop_duplicates()

# Rename AAPL column to XYZ
new_df = new_df.rename(columns = {'AAPL': 'XYZ'})

# Reset index 
new_df.reset_index(inplace = True)

new_df.tail()

Unnamed: 0,Date,GOOG,XYZ
141,2024-04-10,157.480804,167.359024
142,2024-04-18,157.281021,166.62088
143,2024-04-23,159.73822,166.481216
144,2024-04-22,157.770462,165.423874
145,2024-04-19,155.542999,164.585999


#### Sorting and Ranking
- `DataFrame.sort_values()` - Sort DataFrame by the values along either axis.
- `DataFrame.sort_index()` - Sort DataFrame by its index.
- `DataFrame.rank()` - Assign ranks to entries in the DataFrame, with options for different ranking methods.
- `DataFrame.nsmallest()` - Return the first `n` rows with the smallest values in specified columns.
- `DataFrame.nlargest()` - Return the first `n` rows with the largest values in specified columns.



In [60]:
### Sorting and Ranking

# Sort GOOG column into descending order
sort_vals = stock_data.sort_values(by = 'GOOG', ascending = False, inplace = True)

# Sort index into descending
sorted_index = stock_data.sort_index(ascending = False, inplace = True)

# Get a df with the 5 largest values in GOOG col
top5 = stock_data.nlargest(5, 'GOOG')


#### Aggregation and Grouping
- `DataFrame.groupby()` - Group DataFrame using a mapper or by a series of columns, applying a function to each group independently.
- `DataFrame.pivot_table()` - Create a spreadsheet-style pivot table as a DataFrame.
- `DataFrame.agg()` - Aggregate data using one or more operations over specified axis.
- `pd.concat()` - Concatenate two or more DataFrames along a particular axis.
- `pd.merge()` - Merge DataFrame objects by performing a database-style join operation by columns or indexes.


In [79]:
### Aggregation and Grouping
df1 = pd.DataFrame({
    'Item': ['Laptop', 'Tablet', 'Laptop', 'Phone', 'Tablet', 'Phone', 'Laptop', 'Tablet'],
    'Date_Bought': ['2023-01-01', '2023-01-03', '2023-01-05', '2023-01-07', '2023-01-09', '2023-01-11', '2023-01-13', '2023-01-15'],
    'Date_Sold': ['2023-01-10', '2023-01-12', '2023-01-15', '2023-01-18', '2023-01-20', '2023-01-22', '2023-01-25', '2023-01-28'],
    'Amount': [1000, 600, 1200, 800, 650, 850, 1300, 700],
    'Profit': [200, 100, 250, 150, 120, 180, 300, 130]
})
df2 = pd.DataFrame({
    'Item': ['Laptop', 'Tablet', 'Phone', 'Laptop', 'Tablet', 'Phone'],
    'Store': ['Store_A', 'Store_B', 'Store_C', 'Store_A', 'Store_B', 'Store_C'],
    'Region': ['North', 'South', 'East', 'North', 'South', 'East']
})
# Group the sales data by Item and calculate the total Amount and Profit for each item
grouped_sales = df1.groupby('Item').sum()
print(grouped_sales[['Amount', 'Profit']])

# Aggregate the sales data to calculate the total Amount and the average Profit across all sales
aggregated_data = df1.agg({'Amount': 'sum', 'Profit': 'mean'})
print(aggregated_data)

# Aggregate the sales data to find the minimum and maximum Amount sold for each Item."
aggregated_item_data = df1.groupby('Item').agg({'Amount': ['min', 'max']})
print(aggregated_item_data)

# Concatenate the sales DataFrame with the store information DataFrame along the rows
concatenated_data = pd.concat([df1, df2], axis = 0, ignore_index = True)
#print(concatenated_data)

# Merge the sales DataFrame with the store information DataFrame on the Item column
merged_data = pd.merge(df1, df2, on = 'Item')

# Merge the sales DataFrame with the store information DataFrame to include only sales that occurred in the North region
north_sales = pd.merge(df1, df2[df2['Region'] == 'North'], on = 'Item')
print(north_sales)


        Amount  Profit
Item                  
Laptop    3500     750
Phone     1650     330
Tablet    1950     350
Amount    7100.00
Profit     178.75
dtype: float64
       Amount      
          min   max
Item               
Laptop   1000  1300
Phone     800   850
Tablet    600   700
     Item Date_Bought   Date_Sold  Amount  Profit    Store Region
0  Laptop  2023-01-01  2023-01-10    1000     200  Store_A  North
1  Laptop  2023-01-01  2023-01-10    1000     200  Store_A  North
2  Laptop  2023-01-05  2023-01-15    1200     250  Store_A  North
3  Laptop  2023-01-05  2023-01-15    1200     250  Store_A  North
4  Laptop  2023-01-13  2023-01-25    1300     300  Store_A  North
5  Laptop  2023-01-13  2023-01-25    1300     300  Store_A  North
