# This notebook applies PCA to linguistic feature selection

In [None]:
import pickle
import os
import tqdm
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from statsmodels.api import OLS
import statsmodels.api as sm
import statsmodels.stats as sts
from scipy import stats
import statsmodels.stats.api as sms


import datetime
date = datetime.datetime.now()
date = date.strftime("%Y.%m.%d")
np.random.seed(0)

def stdz(series: pd.Series):
    """Standardize the given pandas Series"""
    return (series - series.mean())/series.std()
def unitstdz(series:pd.Series):
    return (series - series.min())/(series.max()-series.min())

from imblearn.under_sampling import RandomUnderSampler

import re
def extract_video_number(filename):
    match = re.match(r'(\d+)[+-]', filename)
    return match.group(1) if match else None

# read data

In [36]:
rating = pd.read_excel('eyetracking-coordinates-imname.xlsx', sheet_name='video-based')

In [37]:
rating.columns

Index(['VideoNumber', 'GroupN(1=low(1-3),2=neutral(4-6),3=high(7-10))',
       'PrefereneMean', 'PurchaseDesireMean', 'NarrativeInterityMean',
       'EmpathyMean', 'TotalTime(S)', 'ShotNumber', 'ShotMean',
       'Shotvariance'],
      dtype='object')

In [38]:
rating.drop('Shotvariance',axis=1,inplace=True)

In [39]:
rating.head()

Unnamed: 0,VideoNumber,"GroupN(1=low(1-3),2=neutral(4-6),3=high(7-10))",PrefereneMean,PurchaseDesireMean,NarrativeInterityMean,EmpathyMean,TotalTime(S),ShotNumber,ShotMean
0,1,2,5.957447,5.553191,5.085106,5.255319,33,8,4.125
1,2,2,5.0,4.0,5.0,6.333333,84,9,9.333333
2,3,1,3.0,1.777778,5.555556,2.777778,31,10,3.1
3,4,3,7.111111,6.777778,4.666667,6.777778,34,17,2.0
4,5,1,3.666667,2.333333,5.833333,2.833333,32,12,2.666667


In [40]:
text_path = [os.path.join('../NLP',i) for i in os.listdir('../NLP/')]

In [41]:
text_path

['../NLP/LIWC-22 Results - transcript_plain - LIWC Analysis.csv',
 '../NLP/LIWC-22 Results - transcript_plain_abosolutistwords.csv',
 '../NLP/LIWC-22 Results - transcript_plain_agitationdejection.csv',
 '../NLP/LIWC-22 Results - transcript_plain_behavioralactivation.csv',
 '../NLP/LIWC-22 Results - transcript_plain_brand.csv',
 '../NLP/LIWC-22 Results - transcript_plain_controversial_terms.csv',
 '../NLP/LIWC-22 Results - transcript_plain_costbenefit.csv',
 '../NLP/LIWC-22 Results - transcript_plain_creativity.csv',
 '../NLP/LIWC-22 Results - transcript_plain_imagination_lexicon.csv',
 '../NLP/LIWC-22 Results - transcript_plain_mindperception.csv',
 '../NLP/LIWC-22 Results - transcript_plain_security.csv',
 '../NLP/all_results.csv']

In [42]:
def split_ad_id(ad_id):
    if '+' in ad_id[0:5]:
        return ad_id.split('+')[0]
    elif '-' in ad_id[0:5]:
        return ad_id.split('-')[0]
    else:
        return ad_id  # if neither '+' nor '-' is present, return the original ad_id

In [43]:
def lowercase_columns(df):
    df.columns = df.columns.str.lower()
    return df

# Read the first file and convert column names to lowercase
features = pd.read_csv(text_path[0])
features = lowercase_columns(features)

# Read and merge each file into the features dataframe
for path in tqdm.tqdm(text_path[1:-1]):
    df = pd.read_csv(path)
    df = lowercase_columns(df)
    
    common_cols = features.columns.intersection(df.columns).tolist()
    additional_cols = df.columns.difference(features.columns).tolist()

    features = pd.merge(features, df[['filename'] + additional_cols], on='filename', how='inner')

features['VideoNumber'] = features['filename'].apply(split_ad_id)

100%|██████████| 10/10 [00:00<00:00, 105.86it/s]


In [44]:
features.columns

Index(['filename', 'segment', 'wc', 'analytic', 'clout', 'authentic', 'tone',
       'wps', 'bigwords', 'dic',
       ...
       'spiritual_imagery', 'total', 'vision', 'agency',
       'agentrelatedemotions', 'experience', 'mindoverall',
       'patientrelatedemotions', 'security', 'VideoNumber'],
      dtype='object', length=159)

In [45]:
features.tail()

Unnamed: 0,filename,segment,wc,analytic,clout,authentic,tone,wps,bigwords,dic,...,spiritual_imagery,total,vision,agency,agentrelatedemotions,experience,mindoverall,patientrelatedemotions,security,VideoNumber
106,95+The Promise of Similac (1).txt,1,1116,73.81,98.55,8.3,99.0,7.15,23.3,86.02,...,0,1.16,0,2.33,0.0,1.16,3.49,1.16,0.0,95
107,96+The Promise of Similac.txt,1,100,93.09,86.52,1.0,67.48,16.67,27.0,76.0,...,0,3.0,0,3.0,0.0,0.0,3.0,0.0,0.0,96
108,97+The Wonder of Baby SMA® PRO Follow on Milk...,1,688,98.31,98.38,25.75,95.15,11.47,23.26,84.01,...,0,8.72,0,0.0,1.45,2.91,2.91,0.0,1.45,97
109,98+Unibale Baby formula- Babyactiv8.txt,1,288,99.0,99.0,4.36,99.0,28.8,26.04,89.58,...,0,3.47,0,0.0,0.0,1.74,1.74,0.0,0.0,98
110,99+VINTAGE 1958 PET EVAPORATED MILK COMMERCIAL...,1,1546,74.77,78.84,1.0,56.58,12.88,17.85,86.8,...,0,0.78,0,0.0,0.0,0.0,0.0,0.0,0.78,99


In [46]:
features['VideoNumber'] = features['VideoNumber'].astype(np.int64)

In [48]:
data = features.merge(rating, on='VideoNumber', how='inner')

In [49]:
data['ad_id'] = data['filename'].apply(lambda x: x.replace('.txt', '.mp4'))

In [53]:
for col in data.columns:
    if data[col].isna().any():
        print(col)

analytic
clout
authentic
tone


In [54]:
data['tone'] = data['tone'].fillna(data['tone'].mean())
data['authentic'] = data['authentic'].fillna(data['authentic'].mean())
data['clout'] = data['clout'].fillna(data['clout'].mean())
data['analytic'] = data['analytic'].fillna(data['analytic'].mean())

In [55]:
data = data.dropna(how='any')
data = data.replace([np.inf, -np.inf], np.nan).dropna()

In [58]:
features = ['wc', 'analytic', 'clout', 'authentic', 'tone', 'wps', 'bigwords', 'dic', 'linguistic', 'function', 'pronoun', 
            'ppron', 'i', 'we', 'you', 'shehe', 'they', 'ipron', 'det', 'article', 'number', 'prep', 'auxverb', 'adverb', 
            'conj', 'negate', 'verb', 'adj', 'quantity', 'drives', 'affiliation', 'achieve', 'power', 'cognition', 
            'allnone', 'cogproc', 'insight', 'cause', 'discrep', 'tentat', 'certitude', 'differ', 'memory', 'affect',
            'tone_pos', 'tone_neg', 'emotion', 'emo_pos', 'emo_neg', 'emo_anx', 'emo_anger', 'emo_sad', 'swear', 'social',
            'socbehav', 'prosocial', 'polite', 'conflict', 'moral', 'comm', 'socrefs', 'family', 'friend', 'female', 'male',
            'culture', 'politic', 'ethnicity', 'tech', 'lifestyle', 'leisure', 'home', 'work', 'money', 'relig', 'physical',
            'health', 'illness', 'wellness', 'mental', 'substances', 'sexual', 'food', 'death', 'need', 'want', 'acquire', 
            'lack', 'fulfill', 'fatigue', 'reward', 'risk', 'curiosity', 'allure', 'perception', 'attention', 'motion', 
            'space', 'visual', 'auditory', 'feeling', 'time', 'focuspast', 'focuspresent', 'focusfuture', 'conversation', 
            'netspeak', 'assent', 'nonflu', 'filler', 'allpunc', 'period', 'comma', 'qmark', 'exclam', 'apostro', 'otherp', 
            'emoji', 'absolutist', 'agitation', 'dejection', 'accomplishment', 'behavioral_activation', 'breadth', 
            'decisions', 'effort_enjoyment', 'longterm', 'satisfaction', 'structure', 'competence', 'excitement', 
            'not_relevant', 'ruggedness', 'sincerity', 'sophistication', 'highcontroversial', 'lowcontroversial',
            'mediumcontroversial', 'benefit', 'cost', 'creativity_innovation', 'alterations', 'cogs_precogs', 
            'enchantment_emotions', 'fabulations', 'infinity_eternity', 'renewal', 'spiritual_imagery', 'total', 
            'vision', 'agency', 'agentrelatedemotions', 'experience', 'mindoverall', 'patientrelatedemotions', 'security']

In [59]:
len(features)

156

# dispersion

In [60]:
data.head()

Unnamed: 0,filename,segment,wc,analytic,clout,authentic,tone,wps,bigwords,dic,...,VideoNumber,"GroupN(1=low(1-3),2=neutral(4-6),3=high(7-10))",PrefereneMean,PurchaseDesireMean,NarrativeInterityMean,EmpathyMean,TotalTime(S),ShotNumber,ShotMean,ad_id
0,1+90 Years Crafting.txt,1,610,55.44,40.06,18.98,66.73,13.56,25.08,80.82,...,1,2,5.957447,5.553191,5.085106,5.255319,33,8,4.125,1+90 Years Crafting.mp4
1,10+Aptamil Advert 2021.txt,1,628,85.05,99.0,24.57,65.41,13.96,20.06,85.67,...,10,2,4.166667,4.0,5.666667,3.333333,31,14,2.214286,10+Aptamil Advert 2021.mp4
2,100+Vintage Carnation infant formula commercia...,1,3358,67.34,85.71,5.65,99.0,8.0,19.39,84.37,...,100,1,3.333333,3.0,6.666667,2.333333,69,26,2.653846,100+Vintage Carnation infant formula commercia...
3,101+Welcome To Kendamil.txt,1,1076,93.15,83.65,19.04,96.36,19.21,25.56,80.58,...,101,2,4.176471,3.588235,5.166667,3.388889,61,14,4.357143,101+Welcome To Kendamil.mp4
4,102+Welcome to the Kendamil family!.txt,1,1131,89.52,88.74,26.07,89.47,16.88,26.26,78.6,...,102,2,4.285714,3.857143,5.0,2.857143,61,19,3.210526,102+Welcome to the Kendamil family!.mp4


In [62]:
dispersion1 = pd.read_json('dispersion_measure0629.json', lines=True)
dispersion1.head(2)

Unnamed: 0,ad_id,id,std_x,std_y,combined_std,convex_hull_area,convex_hull_area_shapely,mean_euclidean_distance
0,1+90 Years Crafting.mp4,R_2qFdJq0jnOo6gpt,0.049059,0.132776,0.14155,0.067034,0.067034,0.11414
1,1+90 Years Crafting.mp4,R_5iNTkLqxP6ZbD7H,0.071655,0.104923,0.127056,0.06368,0.06368,0.112329


In [63]:
dispersion1['VideoNumber'] = dispersion1['ad_id'].apply(extract_video_number)
data['VideoNumber'] = data['VideoNumber'].astype(int)
dispersion1['VideoNumber'] = dispersion1['VideoNumber'].astype(int)
data = data.merge(dispersion1[['id', 'std_x', 'std_y', 'combined_std', 'convex_hull_area',
       'convex_hull_area_shapely', 'mean_euclidean_distance', 'VideoNumber']], on='VideoNumber')

In [65]:
data.head(2)

Unnamed: 0,filename,segment,wc,analytic,clout,authentic,tone,wps,bigwords,dic,...,ShotNumber,ShotMean,ad_id,id,std_x,std_y,combined_std,convex_hull_area,convex_hull_area_shapely,mean_euclidean_distance
0,1+90 Years Crafting.txt,1,610,55.44,40.06,18.98,66.73,13.56,25.08,80.82,...,8,4.125,1+90 Years Crafting.mp4,R_2qFdJq0jnOo6gpt,0.049059,0.132776,0.14155,0.067034,0.067034,0.11414
1,1+90 Years Crafting.txt,1,610,55.44,40.06,18.98,66.73,13.56,25.08,80.82,...,8,4.125,1+90 Years Crafting.mp4,R_5iNTkLqxP6ZbD7H,0.071655,0.104923,0.127056,0.06368,0.06368,0.112329


In [66]:
len(data)

746

In [67]:
data = data.dropna()

In [68]:
len(data)

746

In [69]:
data.columns

Index(['filename', 'segment', 'wc', 'analytic', 'clout', 'authentic', 'tone',
       'wps', 'bigwords', 'dic',
       ...
       'ShotNumber', 'ShotMean', 'ad_id', 'id', 'std_x', 'std_y',
       'combined_std', 'convex_hull_area', 'convex_hull_area_shapely',
       'mean_euclidean_distance'],
      dtype='object', length=175)

# PCA

In [70]:
features = ['wc', 'analytic', 'clout', 'authentic', 'tone', 'wps', 'bigwords', 'dic', 'linguistic', 'function', 'pronoun', 
            'ppron', 'i', 'we', 'you', 'shehe', 'they', 'ipron', 'det', 'article', 'number', 'prep', 'auxverb', 'adverb', 
            'conj', 'negate', 'verb', 'adj', 'quantity', 'drives', 'affiliation', 'achieve', 'power', 'cognition', 
            'allnone', 'cogproc', 'insight', 'cause', 'discrep', 'tentat', 'certitude', 'differ', 'memory', 'affect',
            'tone_pos', 'tone_neg', 'emotion', 'emo_pos', 'emo_neg', 'emo_anx', 'emo_anger', 'emo_sad', 'swear', 'social',
            'socbehav', 'prosocial', 'polite', 'conflict', 'moral', 'comm', 'socrefs', 'family', 'friend', 'female', 'male',
            'culture', 'politic', 'ethnicity', 'tech', 'lifestyle', 'leisure', 'home', 'work', 'money', 'relig', 'physical',
            'health', 'illness', 'wellness', 'mental', 'substances', 'sexual', 'food', 'death', 'need', 'want', 'acquire', 
            'lack', 'fulfill', 'fatigue', 'reward', 'risk', 'curiosity', 'allure', 'perception', 'attention', 'motion', 
            'space', 'visual', 'auditory', 'feeling', 'time', 'focuspast', 'focuspresent', 'focusfuture', 'conversation', 
            'netspeak', 'assent', 'nonflu', 'filler', 'allpunc', 'period', 'comma', 'qmark', 'exclam', 'apostro', 'otherp', 
            'emoji', 'absolutist', 'agitation', 'dejection', 'accomplishment', 'behavioral_activation', 'breadth', 
            'decisions', 'effort_enjoyment', 'longterm', 'satisfaction', 'structure', 'competence', 'excitement', 
            'not_relevant', 'ruggedness', 'sincerity', 'sophistication', 'highcontroversial', 'lowcontroversial',
            'mediumcontroversial', 'benefit', 'cost', 'creativity_innovation', 'alterations', 'cogs_precogs', 
            'enchantment_emotions', 'fabulations', 'infinity_eternity', 'renewal', 'spiritual_imagery', 'total', 
            'vision', 'agency', 'agentrelatedemotions', 'experience', 'mindoverall', 'patientrelatedemotions', 'security']

In [71]:
from sklearn.linear_model import LassoCV
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

X = data[features]

# Standardize your features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [72]:
from sklearn.decomposition import PCA
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import GridSearchCV


In [86]:
# Apply PCA
pca = PCA()
X_pca = pca.fit_transform(X_scaled)

# Get explained variance ratios
explained_var_ratio = pca.explained_variance_ratio_

# Determine number of components to keep
total_var = 0
n_components = 0
for i, ratio in enumerate(explained_var_ratio):
    total_var += ratio
    if total_var >= 0.975:  # Adjust the threshold as needed
        n_components = i + 1
        break

print(f"Number of principal components selected: {n_components}")

# Apply PCA transformation with selected components
pca_final = PCA(n_components=n_components)
X_selected = pca_final.fit_transform(X_scaled)

# Get selected features (if needed, you can interpret the components to understand which original features contribute the most)
selected_features = X.columns[pca_final.components_.mean(axis=0).argsort()[::-1][:n_components]]
print("Selected features by PCA:", selected_features)

Number of principal components selected: 61
Selected features by PCA: Index(['sexual', 'allure', 'lack', 'feeling', 'risk', 'agency', 'dic', 'adj',
       'tone', 'period', 'fabulations', 'linguistic', 'assent', 'wc', 'relig',
       'conj', 'effort_enjoyment', 'politic', 'conversation', 'otherp',
       'insight', 'allpunc', 'focusfuture', 'authentic', 'nonflu', 'security',
       'number', 'sophistication', 'illness', 'excitement', 'exclam',
       'mindoverall', 'swear', 'infinity_eternity', 'reward', 'home',
       'netspeak', 'memory', 'breadth', 'auditory', 'focuspast', 'alterations',
       'leisure', 'they', 'cogs_precogs', 'behavioral_activation', 'auxverb',
       'absolutist', 'verb', 'allnone', 'differ', 'cognition', 'tentat',
       'sincerity', 'lifestyle', 'ruggedness', 'time', 'you', 'ppron',
       'acquire', 'agentrelatedemotions'],
      dtype='object')


In [87]:
sorted_values = np.sort(selected_features.values)
sorted_values

array(['absolutist', 'acquire', 'adj', 'agency', 'agentrelatedemotions',
       'allnone', 'allpunc', 'allure', 'alterations', 'assent',
       'auditory', 'authentic', 'auxverb', 'behavioral_activation',
       'breadth', 'cognition', 'cogs_precogs', 'conj', 'conversation',
       'dic', 'differ', 'effort_enjoyment', 'excitement', 'exclam',
       'fabulations', 'feeling', 'focusfuture', 'focuspast', 'home',
       'illness', 'infinity_eternity', 'insight', 'lack', 'leisure',
       'lifestyle', 'linguistic', 'memory', 'mindoverall', 'netspeak',
       'nonflu', 'number', 'otherp', 'period', 'politic', 'ppron',
       'relig', 'reward', 'risk', 'ruggedness', 'security', 'sexual',
       'sincerity', 'sophistication', 'swear', 'tentat', 'they', 'time',
       'tone', 'verb', 'wc', 'you'], dtype=object)

In [88]:
correlation_matrix = data[list(sorted_values)].corr()
# Define a threshold for high correlation
threshold = 0.9

# Find the index pairs of highly correlated variables
high_corr_var_pairs = [
    (correlation_matrix.columns[i], correlation_matrix.columns[j])
    for i in range(len(correlation_matrix.columns))
    for j in range(i + 1, len(correlation_matrix.columns))
    if correlation_matrix.iloc[i, j] > threshold
]

# Display the highly correlated variable pairs
print("Highly correlated variable pairs:")
for pair in high_corr_var_pairs:
    print(pair)

Highly correlated variable pairs:
('assent', 'politic')
('auditory', 'breadth')
('auditory', 'leisure')
('auditory', 'lifestyle')
('breadth', 'leisure')
('breadth', 'lifestyle')
('conversation', 'nonflu')
('leisure', 'lifestyle')
('netspeak', 'politic')


In [89]:
remove_colinearity_features = ['assent','auditory','breadth','conversation','leisure','netspeak','ppron']
sorted_values = [i for i in sorted_values if i not in remove_colinearity_features]

In [90]:
print(sorted_values)

['absolutist', 'acquire', 'adj', 'agency', 'agentrelatedemotions', 'allnone', 'allpunc', 'allure', 'alterations', 'authentic', 'auxverb', 'behavioral_activation', 'cognition', 'cogs_precogs', 'conj', 'dic', 'differ', 'effort_enjoyment', 'excitement', 'exclam', 'fabulations', 'feeling', 'focusfuture', 'focuspast', 'home', 'illness', 'infinity_eternity', 'insight', 'lack', 'lifestyle', 'linguistic', 'memory', 'mindoverall', 'nonflu', 'number', 'otherp', 'period', 'politic', 'relig', 'reward', 'risk', 'ruggedness', 'security', 'sexual', 'sincerity', 'sophistication', 'swear', 'tentat', 'they', 'time', 'tone', 'verb', 'wc', 'you']


In [91]:
len(sorted_values)

54

In [96]:
data['preference'] = data['GroupN(1=low(1-3),2=neutral(4-6),3=high(7-10))']

In [97]:
conditions = [
    (0 < data['PurchaseDesireMean']) & (data['PurchaseDesireMean'] <= 3),
    (3 < data['PurchaseDesireMean']) & (data['PurchaseDesireMean'] <= 6),
    (6 < data['PurchaseDesireMean']) & (data['PurchaseDesireMean'] <= 10)
]

choices = [1, 2, 3]

data['purchase'] = np.select(conditions, choices)

In [98]:
data.columns

Index(['filename', 'segment', 'wc', 'analytic', 'clout', 'authentic', 'tone',
       'wps', 'bigwords', 'dic',
       ...
       'id', 'std_x', 'std_y', 'combined_std', 'convex_hull_area',
       'convex_hull_area_shapely', 'mean_euclidean_distance', 'intercept',
       'preference', 'purchase'],
      dtype='object', length=178)

In [99]:
data.head()

Unnamed: 0,filename,segment,wc,analytic,clout,authentic,tone,wps,bigwords,dic,...,id,std_x,std_y,combined_std,convex_hull_area,convex_hull_area_shapely,mean_euclidean_distance,intercept,preference,purchase
0,1+90 Years Crafting.txt,1,610,55.44,40.06,18.98,66.73,13.56,25.08,80.82,...,R_2qFdJq0jnOo6gpt,0.049059,0.132776,0.14155,0.067034,0.067034,0.11414,1,2,2
1,1+90 Years Crafting.txt,1,610,55.44,40.06,18.98,66.73,13.56,25.08,80.82,...,R_5iNTkLqxP6ZbD7H,0.071655,0.104923,0.127056,0.06368,0.06368,0.112329,1,2,2
2,1+90 Years Crafting.txt,1,610,55.44,40.06,18.98,66.73,13.56,25.08,80.82,...,R_x5FWpiRdYvqrumZ,0.069265,0.068579,0.097471,0.040371,0.040371,0.084495,1,2,2
3,1+90 Years Crafting.txt,1,610,55.44,40.06,18.98,66.73,13.56,25.08,80.82,...,R_2WDsrmAlPtoO0HZ,0.085395,0.055002,0.101575,0.0516,0.0516,0.083171,1,2,2
4,1+90 Years Crafting.txt,1,610,55.44,40.06,18.98,66.73,13.56,25.08,80.82,...,R_2DTAqyIUDqXnKfP,0.06929,0.08772,0.111785,0.053018,0.053018,0.097591,1,2,2


In [100]:
data.tail()

Unnamed: 0,filename,segment,wc,analytic,clout,authentic,tone,wps,bigwords,dic,...,id,std_x,std_y,combined_std,convex_hull_area,convex_hull_area_shapely,mean_euclidean_distance,intercept,preference,purchase
741,99+VINTAGE 1958 PET EVAPORATED MILK COMMERCIAL...,1,1546,74.77,78.84,1.0,56.58,12.88,17.85,86.8,...,R_pcjQCK9dAmqufrX,0.061889,0.095416,0.11373,0.034914,0.034914,0.104958,1,2,2
742,99+VINTAGE 1958 PET EVAPORATED MILK COMMERCIAL...,1,1546,74.77,78.84,1.0,56.58,12.88,17.85,86.8,...,R_3QVndydHS3ggukh,0.072965,0.084651,0.111757,0.060095,0.060095,0.101739,1,2,2
743,99+VINTAGE 1958 PET EVAPORATED MILK COMMERCIAL...,1,1546,74.77,78.84,1.0,56.58,12.88,17.85,86.8,...,R_3n75JGLz1DIwqsc,0.072944,0.09115,0.116744,0.066781,0.066781,0.103575,1,2,2
744,99+VINTAGE 1958 PET EVAPORATED MILK COMMERCIAL...,1,1546,74.77,78.84,1.0,56.58,12.88,17.85,86.8,...,R_W3A7gkKqFyYxn5D,0.042931,0.06393,0.077007,0.020444,0.020444,0.069639,1,2,2
745,99+VINTAGE 1958 PET EVAPORATED MILK COMMERCIAL...,1,1546,74.77,78.84,1.0,56.58,12.88,17.85,86.8,...,R_2Xps7uqZpYygTm6,0.067836,0.06846,0.096376,0.040415,0.040415,0.083729,1,2,2
