In [1]:
class SparseVector:
    def __init__(self, nums: list[int]):
        self.nums = nums
        self.indices = [i for i in range(len(nums)) if nums[i] != 0]

    def dot_product(self, vec: 'SparseVector') -> int:
        result = 0
        i = 0
        j = 0
        while i < len(self.indices) and j < len(vec.indices):
            if self.indices[i] == vec.indices[j]:
                result += self.nums[self.indices[i]] * vec.nums[vec.indices[j]]
                i += 1
                j += 1
            elif self.indices[i] < vec.indices[j]:
                i += 1
            else:
                j += 1
        return result

In [2]:
nums1 = [0,0,3,40,0,0,0,5,0]
nums2 = [1,0,0,2,0,14,0,0,0]
vec1 = SparseVector(nums1)
vec2 = SparseVector(nums2)
result = vec1.dot_product(vec2)


In [3]:
result

80

In [4]:
import pandas as pd
import numpy as np

In [7]:
data = {
    "customer_global_rk": [110, 110, 110, 111, 111, 111, 112, 112, 113, 113, 113],
    "trans_dt": ["2023-01-01", "2023-01-01", "2023-01-03", "2023-01-04", "2023-01-05", "2023-01-08", "2023-01-01", "2023-01-02", "2023-01-09", "2023-01-10", "2023-01-12"],
    "trans_amt": [123, 235, 345, 654, 229, 345, 212, 334, 789, 200, 557]
}
df = pd.DataFrame(data)


Unnamed: 0,customer_global_rk,trans_dt,trans_amt
0,110,2023-01-01,123
1,110,2023-01-01,235
2,110,2023-01-03,345
3,111,2023-01-04,654
4,111,2023-01-05,229
5,111,2023-01-08,345
6,112,2023-01-01,212
7,112,2023-01-02,334
8,113,2023-01-09,789
9,113,2023-01-10,200


In [8]:
a1 = df.groupby(["customer_global_rk", "trans_dt"])["trans_amt"].sum()

In [9]:
a1

customer_global_rk  trans_dt  
110                 2023-01-01    358
                    2023-01-03    345
111                 2023-01-04    654
                    2023-01-05    229
                    2023-01-08    345
112                 2023-01-01    212
                    2023-01-02    334
113                 2023-01-09    789
                    2023-01-10    200
                    2023-01-12    557
Name: trans_amt, dtype: int64

In [37]:
data = {
    'date': ['2022-01-01', '2022-01-02', '2022-01-03', '2022-01-01', '2022-01-02', '2022-01-03', '2022-01-02'],
    'client_id': [1,1,2,2,3,3,1],
    'amount': [100, 200, 300, 400, 500, 600, 700]
}
df = pd.DataFrame(data)
df

Unnamed: 0,date,client_id,amount
0,2022-01-01,1,100
1,2022-01-02,1,200
2,2022-01-03,2,300
3,2022-01-01,2,400
4,2022-01-02,3,500
5,2022-01-03,3,600
6,2022-01-02,1,700


In [39]:
df['date'] = pd.to_datetime(df['date'])
min_date = df['date'].min()
max_date = df['date'].max()

trans = df.groupby(['client_id', 'date'])['amount'].sum().reset_index()

trans_pivot = pd.pivot_table(trans, values='amount', index='client_id', columns='date')
trans_pivot = trans_pivot.fillna(0.0)

# Transform the table to matrix
matrix = trans_pivot.values.astype(int).tolist()
#matrix

# Add zeros for days when client doesnt have transactions
for row in matrix:
    while len(row) < (max_date - min_date).days + 1:
        row.append(0)
print(matrix)


[[100, 900, 0], [400, 0, 300], [0, 500, 600]]


In [40]:
dbf = pd.DataFrame(matrix)

In [41]:
dbf

Unnamed: 0,0,1,2
0,100,900,0
1,400,0,300
2,0,500,600


In [1]:
import pandas as pd

In [6]:
import pandas as pd
from pandas_datareader import data as pdr
import yfinance as yf
yf.pdr_override()


# Define the futures contract name and timeframe
contract = 'GC=F' 
#contract = 'UVXY'
start = '2023-01-01'
end = '2024-01-01'

# Download the data
df = pdr.get_data_yahoo(contract, start=start, end=end)

# Keep only the relevant columns  
df = df[['Open', 'High', 'Low', 'Close', 'Volume']]

# Output to CSV
#df.to_csv('gold_futures.csv')

[*********************100%***********************]  1 of 1 completed


In [11]:
df.head()

Unnamed: 0_level_0,Open,High,Low,Close,Volume,diff
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2023-01-03,1836.199951,1839.699951,1836.199951,1839.699951,29,
2023-01-04,1845.599976,1859.099976,1845.599976,1852.800049,25,1839.699951
2023-01-05,1855.199951,1855.199951,1834.800049,1834.800049,24,1852.800049
2023-01-06,1838.400024,1868.199951,1835.300049,1864.199951,26,1834.800049
2023-01-09,1867.0,1880.0,1867.0,1872.699951,62,1864.199951


In [16]:
df["diff"] = df["Close"].pct_change() * 100

In [17]:
df.head()

Unnamed: 0_level_0,Open,High,Low,Close,Volume,diff
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2023-01-03,1836.199951,1839.699951,1836.199951,1839.699951,29,
2023-01-04,1845.599976,1859.099976,1845.599976,1852.800049,25,0.712078
2023-01-05,1855.199951,1855.199951,1834.800049,1834.800049,24,-0.971503
2023-01-06,1838.400024,1868.199951,1835.300049,1864.199951,26,1.602349
2023-01-09,1867.0,1880.0,1867.0,1872.699951,62,0.45596


In [18]:
df['diff'].min()

-2.7866212520694877

In [19]:
df['diff'].max()

3.108113950051017

In [24]:
# Define min and max for input range  
input_min = df['diff'].min()  
input_max = df['diff'].max()

# Define min and max for output range
output_min = 1
output_max = 64  

# Rescale values  
df['scaled'] = round(((df['diff'] - input_min) / 
                   (input_max - input_min)) * 
                  (output_max - output_min) + output_min)


In [25]:
df.head()

Unnamed: 0_level_0,Open,High,Low,Close,Volume,diff,scaled
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2023-01-03,1836.199951,1839.699951,1836.199951,1839.699951,29,,
2023-01-04,1845.599976,1859.099976,1845.599976,1852.800049,25,0.712078,38.0
2023-01-05,1855.199951,1855.199951,1834.800049,1834.800049,24,-0.971503,20.0
2023-01-06,1838.400024,1868.199951,1835.300049,1864.199951,26,1.602349,48.0
2023-01-09,1867.0,1880.0,1867.0,1872.699951,62,0.45596,36.0


In [27]:
# Prepare timeseries for ML 

import pandas as pd
import numpy as np

# Generate sample timeseries  
N = 1000
series = np.random.randint(0, 10, size=N)  

num_steps = 64 
num_labels = 5

data = []
labels = []

# Iterate over series 
for i in range(len(series) - num_steps - num_labels + 1):
    vec = series[i:i+num_steps]
    label = series[i+num_steps:i+num_steps+num_labels]
    
    # Append to lists
    data.append(list(vec))  
    labels.append(list(label))
    
# Create dataframe    
df = pd.DataFrame()
df['vec'] = data
df['label'] = labels

print(df.head())


                                                 vec            label
0  [0, 4, 2, 6, 5, 4, 9, 1, 9, 2, 8, 4, 2, 7, 3, ...  [4, 1, 3, 0, 9]
1  [4, 2, 6, 5, 4, 9, 1, 9, 2, 8, 4, 2, 7, 3, 7, ...  [1, 3, 0, 9, 2]
2  [2, 6, 5, 4, 9, 1, 9, 2, 8, 4, 2, 7, 3, 7, 2, ...  [3, 0, 9, 2, 9]
3  [6, 5, 4, 9, 1, 9, 2, 8, 4, 2, 7, 3, 7, 2, 0, ...  [0, 9, 2, 9, 0]
4  [5, 4, 9, 1, 9, 2, 8, 4, 2, 7, 3, 7, 2, 0, 1, ...  [9, 2, 9, 0, 6]
