**Title**: Data Wrangling 6.2 Exercises  
**Author**: Ryan Weeks  
**Date**: 1/19/2025  
**Description**: In these exercises, I manipulated and analyzed time series data from both financial and seismic datasets. Some operations performed included aggregation, transformation, and querying. These exercises also focused on uncovering trends and insights through data normalization and advanced analysis methods.

In [34]:
import numpy as np
import pandas as pd

earthquakes = pd.read_csv(r"C:\Users\Weekseey\Documents\Bellevue Work\Data Wrangling\earthquakes.csv")
faang = pd.read_csv(r"C:\Users\Weekseey\Documents\Bellevue Work\Data Wrangling\faang.csv")

# 1.

In [4]:
earthquakes.query("parsed_place == 'Japan' and magType == 'mb' and mag >= 4.9")

Unnamed: 0,mag,magType,time,place,tsunami,parsed_place
1563,4.9,mb,1538977532250,"293km ESE of Iwo Jima, Japan",0,Japan
2576,5.4,mb,1538697528010,"37km E of Tomakomai, Japan",0,Japan
3072,4.9,mb,1538579732490,"15km ENE of Hasaki, Japan",0,Japan
3632,4.9,mb,1538450871260,"53km ESE of Hitachi, Japan",0,Japan


# 2.

In [7]:
earthquakes.query("magType == 'ml'").assign(mag_bin = lambda x: pd.cut(x.mag, np.arange(0,10))).mag_bin.value_counts()

mag_bin
(1, 2]    3105
(0, 1]    2207
(2, 3]     862
(3, 4]     122
(4, 5]       2
(5, 6]       1
(6, 7]       0
(7, 8]       0
(8, 9]       0
Name: count, dtype: int64

# 3.

In [60]:
# In order to resample, had to make sure the date index is set to a 'DatetimeIndex'
faang.index = pd.to_datetime(faang.index, errors = "coerce")

In [71]:
monthly_data = faang.groupby("ticker").resample("ME").agg({
    'open': 'mean',
    'close': 'mean',
    'high': 'max',
    'low': 'min',
    'volume': 'sum'
})

# Reset index to get flat DataFrame
monthly_data = monthly_data.reset_index()

# Grouped data columns were being truncated, adjusted some display settings to prevent that.
pd.set_option('display.width', 1000)
pd.set_option('display.max_columns', None)

print(monthly_data)

   ticker       date         open        close         high          low        volume
0    AAPL 2018-01-31    43.505357    43.501309    45.025002    41.174999  2.638718e+09
1    AAPL 2018-02-28    41.819079    41.909737    45.154999    37.560001  3.711577e+09
2    AAPL 2018-03-31    43.761786    43.624048    45.875000    41.235001  2.854911e+09
3    AAPL 2018-04-30    42.441310    42.458572    44.735001    40.157501  2.664617e+09
4    AAPL 2018-05-31    46.239091    46.384205    47.592499    41.317501  2.483905e+09
5    AAPL 2018-06-30    47.180119    47.155357    48.549999    45.182499  2.110498e+09
6    AAPL 2018-07-31    47.549048    47.577857    48.990002    45.855000  1.574766e+09
7    AAPL 2018-08-31    53.121739    53.336522    57.217499    49.327499  2.801276e+09
8    AAPL 2018-09-30    55.582763    55.518421    57.417500    53.825001  2.715888e+09
9    AAPL 2018-10-31    55.300000    55.211413    58.367500    51.522499  3.158994e+09
10   AAPL 2018-11-30    47.954881    47.808

# 4.

In [73]:
crosstab = pd.crosstab(earthquakes.tsunami, earthquakes.magType, values = earthquakes.mag, aggfunc = 'max')
print(crosstab)

magType   mb  mb_lg    md   mh   ml  ms_20    mw  mwb  mwr  mww
tsunami                                                        
0        5.6    3.5  4.11  1.1  4.2    NaN  3.83  5.8  4.8  6.0
1        6.1    NaN   NaN  NaN  5.1    5.7  4.41  NaN  NaN  7.5


# 5.

In [76]:
faang.groupby('ticker').rolling('60D').agg({
    'open': 'mean',
    'close': 'mean',
    'high': 'max',
    'low': 'min',
    'volume': 'sum'
})

Unnamed: 0_level_0,Unnamed: 1_level_0,open,close,high,low,volume
ticker,date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
AAPL,2018-01-02,42.540001,43.064999,43.075001,42.314999,102223600.0
AAPL,2018-01-03,42.836250,43.061249,43.637501,42.314999,220295200.0
AAPL,2018-01-04,42.935833,43.126666,43.637501,42.314999,310033600.0
AAPL,2018-01-05,43.041875,43.282499,43.842499,42.314999,404673600.0
AAPL,2018-01-08,43.151000,43.343500,43.902500,42.314999,486944800.0
...,...,...,...,...,...,...
NFLX,2018-12-24,283.509251,281.931750,332.049988,233.679993,525657600.0
NFLX,2018-12-26,281.844501,280.777750,332.049988,231.229996,520444300.0
NFLX,2018-12-27,281.070489,280.162927,332.049988,231.229996,532679500.0
NFLX,2018-12-28,279.916342,279.461464,332.049988,231.229996,521973500.0


# 6.

In [79]:
faang.pivot_table(index = 'ticker')

Unnamed: 0_level_0,close,high,low,open,volume
ticker,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
AAPL,47.263357,47.748526,46.795877,47.277859,136080300.0
AMZN,1641.726176,1662.839839,1619.840519,1644.072709,5648994.0
FB,171.510956,173.613347,169.303148,171.472948,27658600.0
GOOG,1113.225134,1125.777606,1101.001658,1113.554101,1741965.0
NFLX,319.290319,325.219322,313.18733,319.620558,11469620.0


# 7.

In [106]:
# Filtering only Q4 2018 and amazon data, then calculating the z-scores
faang.loc['2018-Q4'].query("ticker == 'AMZN'").drop(columns='ticker').apply(
    lambda x: x.sub(x.mean()).div(x.std())
).head()

Unnamed: 0_level_0,high,low,open,close,volume
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2018-10-01,2.368006,2.502113,2.337813,2.385848,-1.630411
2018-10-02,2.227302,2.247433,2.190795,2.155037,-0.861879
2018-10-03,2.058955,2.139987,2.06857,2.025489,-0.920345
2018-10-04,1.819474,1.781561,1.850048,1.722816,-0.126582
2018-10-05,1.628173,1.554416,1.642819,1.584748,-0.298771


# 8.

In [125]:
# Create the 3 events
event_data = pd.DataFrame({
    'ticker': 'FB',
    'date': pd.to_datetime(
         ['2018-07-25', '2018-03-19', '2018-03-20']
    ), 
    'event': [
         'Disappointing user growth announced after close.',
         'Cambridge Analytica story',
         'FTC investigation'
    ]
}).set_index(['date', 'ticker'])  # Setting the index

# join the 2 data frames
faang.reset_index().set_index(['date', 'ticker']).join(event_data, how = 'outer')

Unnamed: 0_level_0,Unnamed: 1_level_0,high,low,open,close,volume,event
date,ticker,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2018-01-02,AAPL,43.075001,42.314999,42.540001,43.064999,102223600.0,
2018-01-02,AMZN,1190.000000,1170.510010,1172.000000,1189.010010,2694500.0,
2018-01-02,FB,181.580002,177.550003,177.679993,181.419998,18151900.0,
2018-01-02,GOOG,1066.939941,1045.229980,1048.339966,1065.000000,1237600.0,
2018-01-02,NFLX,201.649994,195.419998,196.100006,201.070007,10966900.0,
...,...,...,...,...,...,...,...
2018-12-31,AAPL,39.840000,39.119999,39.632500,39.435001,140014000.0,
2018-12-31,AMZN,1520.760010,1487.000000,1510.800049,1501.969971,6954500.0,
2018-12-31,FB,134.639999,129.949997,134.449997,131.089996,24625300.0,
2018-12-31,GOOG,1052.699951,1023.590027,1050.959961,1035.609985,1493300.0,


# 9.

In [143]:
# Reset and set new index of ticker and date
faang = faang.reset_index().set_index(['ticker', 'date'])

# The "transform('first')" gives the first row of each group (ticker) and the division normalizes all values relative to the first date
faang_index = (faang / faang.groupby(level='ticker').transform('first'))

faang_index.groupby(level='ticker').head(5)

Unnamed: 0_level_0,Unnamed: 1_level_0,high,low,open,close,volume
ticker,date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
AAPL,2018-01-02,1.0,1.0,1.0,1.0,1.0
AAPL,2018-01-03,1.013059,1.015952,1.013928,0.999826,1.155033
AAPL,2018-01-04,1.00679,1.016661,1.013987,1.00447,0.877864
AAPL,2018-01-05,1.017818,1.022392,1.019276,1.015906,0.925814
AAPL,2018-01-08,1.019211,1.027591,1.024624,1.012133,0.804816
AMZN,2018-01-02,1.0,1.0,1.0,1.0,1.0
AMZN,2018-01-03,1.013017,1.015199,1.013908,1.012775,1.153758
AMZN,2018-01-04,1.021739,1.029175,1.028157,1.017308,1.121581
AMZN,2018-01-05,1.032891,1.033737,1.038831,1.033751,1.315532
AMZN,2018-01-08,1.053008,1.052558,1.054608,1.048662,1.588235
