In [48]:
import pandas as pd
import numpy as np

In [49]:
data = pd.read_csv('spending.csv', header = None)
data.head(10)

Unnamed: 0,0
0,2/11
1,68330 Feb rent
2,770 charge
3,68330 March rent
4,770 charge
5,880 lunch torisho
6,3580 plants rack amazon
7,7980 air purifier amazon
8,2/12
9,750 McDonald’s


In [50]:
data.columns = data.iloc[0]
data

Unnamed: 0,2/11
0,2/11
1,68330 Feb rent
2,770 charge
3,68330 March rent
4,770 charge
...,...
598,"4000, social dinner, tres hermanos"
599,"600, social snacks, what the dickens"
600,"700, taxi, to Ebisu"
601,7/27


In [51]:
data.rename(columns={'2/11': 'item'}, inplace=True)
data

Unnamed: 0,item
0,2/11
1,68330 Feb rent
2,770 charge
3,68330 March rent
4,770 charge
...,...
598,"4000, social dinner, tres hermanos"
599,"600, social snacks, what the dickens"
600,"700, taxi, to Ebisu"
601,7/27


In [52]:
# check data

print(data.shape)
print(data.isna().sum())

(603, 1)
0
item    0
dtype: int64


In [53]:
# create date column 

def date_column(date_str):
    if date_str.replace('/', '').isdigit() == True:
        return date_str
    
data['Date'] = data['item'].apply(date_column)
data

Unnamed: 0,item,Date
0,2/11,2/11
1,68330 Feb rent,
2,770 charge,
3,68330 March rent,
4,770 charge,
...,...,...
598,"4000, social dinner, tres hermanos",
599,"600, social snacks, what the dickens",
600,"700, taxi, to Ebisu",
601,7/27,7/27


In [54]:
# function to fill in the dates for the Nones. forward fill!
data['Date'] = data['Date'].ffill()
data.head(20)

Unnamed: 0,item,Date
0,2/11,2/11
1,68330 Feb rent,2/11
2,770 charge,2/11
3,68330 March rent,2/11
4,770 charge,2/11
5,880 lunch torisho,2/11
6,3580 plants rack amazon,2/11
7,7980 air purifier amazon,2/11
8,2/12,2/12
9,750 McDonald’s,2/12


In [55]:
# how many rows have just the date and no items:

(data['item'] == data['Date']).sum()

167

In [56]:
# clear out the date-only rows
# boolean mask
data = data[data['item'] != data['Date']]
data.shape

(436, 2)

In [57]:
# convert the dates to datetime objects

from datetime import datetime

def add_year(date_str):
    return datetime.strptime(f'{date_str}/2024', '%m/%d/%Y')

data['Date'] = data['Date'].apply(add_year)
data

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['Date'] = data['Date'].apply(add_year)


Unnamed: 0,item,Date
1,68330 Feb rent,2024-02-11
2,770 charge,2024-02-11
3,68330 March rent,2024-02-11
4,770 charge,2024-02-11
5,880 lunch torisho,2024-02-11
...,...,...
596,"2595, groceries, gyomu super",2024-07-25
598,"4000, social dinner, tres hermanos",2024-07-26
599,"600, social snacks, what the dickens",2024-07-26
600,"700, taxi, to Ebisu",2024-07-26


### next: parse out amount, item, location

In [58]:
def split_item(item):
    if ',' in item:
        return item.split(',')
    else: 
        return item.split()

data['item'] = data['item'].apply(split_item)    
data


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['item'] = data['item'].apply(split_item)


Unnamed: 0,item,Date
1,"[68330, Feb, rent]",2024-02-11
2,"[770, charge]",2024-02-11
3,"[68330, March, rent]",2024-02-11
4,"[770, charge]",2024-02-11
5,"[880, lunch, torisho]",2024-02-11
...,...,...
596,"[2595, groceries, gyomu super]",2024-07-25
598,"[4000, social dinner, tres hermanos]",2024-07-26
599,"[600, social snacks, what the dickens]",2024-07-26
600,"[700, taxi, to Ebisu]",2024-07-26


In [61]:
def parse_items(row):
    price, item, location, details = None, None, None, None
    if len(row) > 0:
        price = row[0]
    if len(row) > 1:
        item = row[1]
    if len(row) > 2:
        location = row[2]
    if len(row) > 3:
        details = row[3]
    return pd.Series([price, item, location, details])

# Apply the function to the 'items' column and create new columns
data[['Price', 'Item', 'Location', 'Details']] = data['item'].apply(parse_items)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[['Price', 'Item', 'Location', 'Details']] = data['item'].apply(parse_items)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[['Price', 'Item', 'Location', 'Details']] = data['item'].apply(parse_items)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[['Price', 'Item', 'Location', 'Details

In [62]:
data

Unnamed: 0,item,Date,Price,Item,Location,Details
1,"[68330, Feb, rent]",2024-02-11,68330,Feb,rent,
2,"[770, charge]",2024-02-11,770,charge,,
3,"[68330, March, rent]",2024-02-11,68330,March,rent,
4,"[770, charge]",2024-02-11,770,charge,,
5,"[880, lunch, torisho]",2024-02-11,880,lunch,torisho,
...,...,...,...,...,...,...
596,"[2595, groceries, gyomu super]",2024-07-25,2595,groceries,gyomu super,
598,"[4000, social dinner, tres hermanos]",2024-07-26,4000,social dinner,tres hermanos,
599,"[600, social snacks, what the dickens]",2024-07-26,600,social snacks,what the dickens,
600,"[700, taxi, to Ebisu]",2024-07-26,700,taxi,to Ebisu,


In [63]:
data.to_csv('Spending 2.csv')

## buncha experimentation

In [222]:
# data['Date']

def find_date(column):
    for i in range(len(column)):
        if column[i].replace('/', '').isdigit() == True:
            date = column[i]

In [223]:
def insert_date(column, ):
    while row.replace('/', '').isdigit() == False:
        

SyntaxError: incomplete input (4087099836.py, line 3)

In [None]:
'2/12'.isdigit()



In [None]:
'2/12'.replace('/', '')