# FIRST CLEANING OF CORN DATA
## cleaned in excel, now to Python

In [18]:
import pandas as pd
import numpy as np
import os
import seaborn as sns
import matplotlib.pyplot as plt

In [19]:
path = r"C:\Users\Noah\Achievement 6"

In [20]:
# importing corn data
df_corn = pd.read_csv(os.path.join(path, '02 Data', 'Cleaned', 'Corn Cleaned.csv'), index_col=False)

In [21]:
df_corn.shape

(5298, 16)

## consistency checks

In [22]:
df_corn.describe()


Unnamed: 0,Year,Zip code,latitude,longitude,Sales ($),State ANSI,Ag District Code
count,5298.0,5298.0,5169.0,5169.0,5298.0,5298.0,5298.0
mean,2017.564364,50493.010004,38.376429,-90.91574,29242220.0,30.486976,48.299358
std,4.968516,21550.142365,4.652816,10.183299,51463350.0,14.507038,25.511498
min,2012.0,501.0,24.82,-124.15,0.0,1.0,10.0
25%,2012.0,32601.0,35.03,-96.85,364750.0,19.0,30.0
50%,2022.0,50046.5,38.46,-89.46,5685500.0,29.0,50.0
75%,2022.0,67438.5,41.66,-83.57,32543000.0,44.0,70.0
max,2022.0,99328.0,48.99,-67.76,393787000.0,56.0,97.0


### checking for mixed type colummns

In [23]:
for col in df_corn.columns:
    if df_corn[col].map(type).nunique() > 1:
        print(col)
        # no output means no mixed type

### column types

In [24]:
df_corn.dtypes

Year                                  int64
County                               object
State                                object
Zip code                              int64
latitude                            float64
longitude                           float64
Plant Hardiness Zone                 object
Corn Acres Harvested                 object
Sales ($)                             int64
Coefficient of Variation (%)         object
State ANSI                            int64
Ag District                          object
Ag District Code                      int64
Commodity                            object
Data Item                            object
Concatenated County, State, Year     object
dtype: object

## changing Zip to string type

In [25]:
df_corn['Zip code'] = df_corn['Zip code'].astype('str')

## renaming Coefficient column - too wordy

In [26]:
df_corn.rename(columns={'Coefficient of Variation (%)': 'CV'}, inplace=True)
print(df_corn)

      Year    County          State Zip code  latitude  longitude  \
0     2022     CREEK       OKLAHOMA    74010     35.83     -96.39   
1     2022     GRADY       OKLAHOMA    73002     34.92     -97.77   
2     2022   MCCLAIN       OKLAHOMA    73010     35.14     -97.65   
3     2022     PAYNE       OKLAHOMA    74023     35.97     -96.76   
4     2022     MAJOR       OKLAHOMA    73718     36.24     -98.18   
...    ...       ...            ...      ...       ...        ...   
5293  2022   KANAWHA  WEST VIRGINIA    25015     38.23     -81.53   
5294  2022   LINCOLN  WEST VIRGINIA    25003     38.28     -81.84   
5295  2022   ASHLAND      WISCONSIN    54514     46.01     -90.49   
5296  2022   NATRONA        WYOMING    82601     42.83    -106.32   
5297  2022  NIOBRARA        WYOMING    82222     43.20    -104.65   

     Plant Hardiness Zone Corn Acres Harvested  Sales ($)   CV  State ANSI  \
0                      7b                  244      71000  0.3          40   
1              

In [27]:
df_corn.describe()

Unnamed: 0,Year,latitude,longitude,Sales ($),State ANSI,Ag District Code
count,5298.0,5169.0,5169.0,5298.0,5298.0,5298.0
mean,2017.564364,38.376429,-90.91574,29242220.0,30.486976,48.299358
std,4.968516,4.652816,10.183299,51463350.0,14.507038,25.511498
min,2012.0,24.82,-124.15,0.0,1.0,10.0
25%,2012.0,35.03,-96.85,364750.0,19.0,30.0
50%,2022.0,38.46,-89.46,5685500.0,29.0,50.0
75%,2022.0,41.66,-83.57,32543000.0,44.0,70.0
max,2022.0,48.99,-67.76,393787000.0,56.0,97.0


### checking for null and duplicates

In [28]:
df_corn.isnull().sum()

Year                                  0
County                                0
State                                 0
Zip code                              0
latitude                            129
longitude                           129
Plant Hardiness Zone                  0
Corn Acres Harvested                  0
Sales ($)                             0
CV                                    0
State ANSI                            0
Ag District                           0
Ag District Code                      0
Commodity                             0
Data Item                             0
Concatenated County, State, Year      0
dtype: int64

In [29]:
df_dups = df_corn[df_corn.duplicated()]

In [30]:
df_dups.shape

(109, 16)

### address null first

### CV and Corn Acres Harvested not integer values- need to change 

In [31]:
# first, must remove null values (D)
df_corn['Corn Acres Harvested'] = df_corn['Corn Acres Harvested'].replace(' (D)', np.nan)

In [32]:
null_counts = df_corn['Corn Acres Harvested'].isnull().sum()
print(null_counts)

625


In [33]:
625/5169

0.12091313600309538

### missing values greater than 5% of total data. Need to find workaround

### replace null value with average for corresponding Ag District

In [34]:
# converting column to numeric for aggregations
df_corn['Corn Acres Harvested'] = pd.to_numeric(df_corn['Corn Acres Harvested'], errors='coerce')
print(df_corn.dtypes)

Year                                  int64
County                               object
State                                object
Zip code                             object
latitude                            float64
longitude                           float64
Plant Hardiness Zone                 object
Corn Acres Harvested                float64
Sales ($)                             int64
CV                                   object
State ANSI                            int64
Ag District                          object
Ag District Code                      int64
Commodity                            object
Data Item                            object
Concatenated County, State, Year     object
dtype: object


In [35]:
# Calculate the mean value for each district, ignoring NaNs
mean_values = df_corn.groupby('Ag District')['Corn Acres Harvested'].transform('mean')

In [36]:
#filling in (D) values with averages of the Ag Region
# Replace NaN values in 'Corn Acres Harvested' with the mean value for their district
df_corn['Corn Acres Harvested'] = df_corn['Corn Acres Harvested'].fillna(mean_values)

In [37]:
null_counts = df_corn['Corn Acres Harvested'].isnull().sum()
print(null_counts)

4


In [38]:
null_rows = df_corn[df_corn['Corn Acres Harvested'].isna()]
print(null_rows)


      Year      County     State Zip code  latitude  longitude  \
4608  2022     JACKSON  COLORADO    80430     40.52    -106.42   
4609  2022        LAKE  COLORADO    80429     39.37    -106.18   
4610  2022      MOFFAT  COLORADO    81610     40.24    -109.00   
4611  2022  RIO GRANDE  COLORADO    81132     37.67    -106.35   

     Plant Hardiness Zone  Corn Acres Harvested  Sales ($)   CV  State ANSI  \
4608                   4b                   NaN          0  (D)           8   
4609                   5a                   NaN          0  (D)           8   
4610                   6a                   NaN          0  (D)           8   
4611                   5b                   NaN          0  (D)           8   

                 Ag District  Ag District Code Commodity  \
4608  NORTHWEST AND MOUNTAIN                10      CORN   
4609  NORTHWEST AND MOUNTAIN                10      CORN   
4610  NORTHWEST AND MOUNTAIN                10      CORN   
4611         SAN LUIS VALLEY     

### still 4 NaN rows. Small enough percentage of population to be ignored.

In [39]:
# Drop rows where 'Corn Acres Harvested' is NaN
df_corn_dropna = df_corn.dropna(subset=['Corn Acres Harvested'])

In [40]:
df_corn_dropna.shape

(5294, 16)

## cleaning CV

In [41]:
uniquevalues_CV = df_corn['CV'].unique()
print(uniquevalues_CV)

['0.3' '2.5' '2.8' '3.1' '3.9' '5.1' '5.4' '5.6' '6.2' '6.4' '6.5' '6.8'
 '7' '7.3' '7.4' '7.8' '8.1' '8.2' '8.3' '8.4' '8.5' '8.8' '9.1' '9.2'
 '9.7' '9.8' '9.9' '10.1' '11.4' '11.8' '12.1' '12.3' '12.5' '12.6' '13.1'
 '13.2' '13.4' '13.5' '13.7' '14.3' '14.4' '15.2' '15.4' '16.1' '16.8'
 '17' '17.4' '17.7' '17.8' '18.2' '18.6' '18.8' '19.1' '19.2' '19.6'
 '19.7' '20' '20.4' '21.7' '21.8' '22.3' '22.5' '22.8' '23' '24.6' '25.3'
 '28.2' '28.3' '29.7' '30.6' '31.5' '37.3' '40.6' '40.9' '42.8' '43.5'
 '45.5' '51.9' '55.7' '62.8' '64.7' '(D)' '(L)']


### replace (L) with 0, (D) with average by region

### converting (L) values into 0. 
#### L = Coefficient of variation or generalized coefficient of variation is less than 0.05 percent or the standard error is less than 0.05 percent of the mean. Therefore, we can zero these values

In [42]:
df_corn['CV'] = df_corn['CV'].replace('(L)', 0)
zero_count = (df_corn['CV'] == 0).sum()
print(zero_count)

91


## repeating process of removing (D) values, this time from Coefficient 


In [43]:
nullvalues_CV = df_corn['CV'].isna()
null_count=nullvalues_CV.sum()
print(null_count)

0


In [44]:
uniquevalues_CV = df_corn['CV'].unique()
print(uniquevalues_CV)

['0.3' '2.5' '2.8' '3.1' '3.9' '5.1' '5.4' '5.6' '6.2' '6.4' '6.5' '6.8'
 '7' '7.3' '7.4' '7.8' '8.1' '8.2' '8.3' '8.4' '8.5' '8.8' '9.1' '9.2'
 '9.7' '9.8' '9.9' '10.1' '11.4' '11.8' '12.1' '12.3' '12.5' '12.6' '13.1'
 '13.2' '13.4' '13.5' '13.7' '14.3' '14.4' '15.2' '15.4' '16.1' '16.8'
 '17' '17.4' '17.7' '17.8' '18.2' '18.6' '18.8' '19.1' '19.2' '19.6'
 '19.7' '20' '20.4' '21.7' '21.8' '22.3' '22.5' '22.8' '23' '24.6' '25.3'
 '28.2' '28.3' '29.7' '30.6' '31.5' '37.3' '40.6' '40.9' '42.8' '43.5'
 '45.5' '51.9' '55.7' '62.8' '64.7' '(D)' 0]


In [45]:
null_counts = df_corn['CV'].isnull().sum()
print(null_counts)

0


In [46]:
uniquevalues_CV = df_corn['CV'].unique()
print(uniquevalues_CV)

['0.3' '2.5' '2.8' '3.1' '3.9' '5.1' '5.4' '5.6' '6.2' '6.4' '6.5' '6.8'
 '7' '7.3' '7.4' '7.8' '8.1' '8.2' '8.3' '8.4' '8.5' '8.8' '9.1' '9.2'
 '9.7' '9.8' '9.9' '10.1' '11.4' '11.8' '12.1' '12.3' '12.5' '12.6' '13.1'
 '13.2' '13.4' '13.5' '13.7' '14.3' '14.4' '15.2' '15.4' '16.1' '16.8'
 '17' '17.4' '17.7' '17.8' '18.2' '18.6' '18.8' '19.1' '19.2' '19.6'
 '19.7' '20' '20.4' '21.7' '21.8' '22.3' '22.5' '22.8' '23' '24.6' '25.3'
 '28.2' '28.3' '29.7' '30.6' '31.5' '37.3' '40.6' '40.9' '42.8' '43.5'
 '45.5' '51.9' '55.7' '62.8' '64.7' '(D)' 0]


In [47]:
# converting column to numeric for aggregations
df_corn['CV'] = pd.to_numeric(df_corn['CV'], errors='coerce')
print(df_corn.dtypes)

Year                                  int64
County                               object
State                                object
Zip code                             object
latitude                            float64
longitude                           float64
Plant Hardiness Zone                 object
Corn Acres Harvested                float64
Sales ($)                             int64
CV                                  float64
State ANSI                            int64
Ag District                          object
Ag District Code                      int64
Commodity                            object
Data Item                            object
Concatenated County, State, Year     object
dtype: object


In [48]:
#calculating the mean ofeach district while ignoring nulls 
mean_values_coeff = df_corn.groupby('Ag District')['CV'].transform('mean')

In [49]:
#Replace nulls in CV with mean
df_corn['CV'] = df_corn['CV'].fillna(mean_values_coeff)

In [50]:
#checking...
null_counts_coeff = df_corn['CV'].isnull().sum()
print(f"Number of NaN values in 'CV' after replacement: {null_counts_coeff}")

Number of NaN values in 'CV' after replacement: 4


In [51]:
# Drop rows where 'CV' is null
df_corn_dropCV = df_corn.dropna(subset=['CV'])
print(df_corn_dropCV)

      Year    County          State Zip code  latitude  longitude  \
0     2022     CREEK       OKLAHOMA    74010     35.83     -96.39   
1     2022     GRADY       OKLAHOMA    73002     34.92     -97.77   
2     2022   MCCLAIN       OKLAHOMA    73010     35.14     -97.65   
3     2022     PAYNE       OKLAHOMA    74023     35.97     -96.76   
4     2022     MAJOR       OKLAHOMA    73718     36.24     -98.18   
...    ...       ...            ...      ...       ...        ...   
5293  2022   KANAWHA  WEST VIRGINIA    25015     38.23     -81.53   
5294  2022   LINCOLN  WEST VIRGINIA    25003     38.28     -81.84   
5295  2022   ASHLAND      WISCONSIN    54514     46.01     -90.49   
5296  2022   NATRONA        WYOMING    82601     42.83    -106.32   
5297  2022  NIOBRARA        WYOMING    82222     43.20    -104.65   

     Plant Hardiness Zone  Corn Acres Harvested  Sales ($)   CV  State ANSI  \
0                      7b                 244.0      71000  0.3          40   
1            

In [52]:
null_counts2 = df_corn.isnull().sum()
print(null_counts2)

Year                                  0
County                                0
State                                 0
Zip code                              0
latitude                            129
longitude                           129
Plant Hardiness Zone                  0
Corn Acres Harvested                  4
Sales ($)                             0
CV                                    4
State ANSI                            0
Ag District                           0
Ag District Code                      0
Commodity                             0
Data Item                             0
Concatenated County, State, Year      0
dtype: int64


## dropping null counts

In [53]:
df_corn_cleaned = df_corn.dropna()
print(df_corn_cleaned)

      Year    County          State Zip code  latitude  longitude  \
0     2022     CREEK       OKLAHOMA    74010     35.83     -96.39   
1     2022     GRADY       OKLAHOMA    73002     34.92     -97.77   
2     2022   MCCLAIN       OKLAHOMA    73010     35.14     -97.65   
3     2022     PAYNE       OKLAHOMA    74023     35.97     -96.76   
4     2022     MAJOR       OKLAHOMA    73718     36.24     -98.18   
...    ...       ...            ...      ...       ...        ...   
5293  2022   KANAWHA  WEST VIRGINIA    25015     38.23     -81.53   
5294  2022   LINCOLN  WEST VIRGINIA    25003     38.28     -81.84   
5295  2022   ASHLAND      WISCONSIN    54514     46.01     -90.49   
5296  2022   NATRONA        WYOMING    82601     42.83    -106.32   
5297  2022  NIOBRARA        WYOMING    82222     43.20    -104.65   

     Plant Hardiness Zone  Corn Acres Harvested  Sales ($)   CV  State ANSI  \
0                      7b                 244.0      71000  0.3          40   
1            

In [54]:
# checking math to see if dropped values match to null values
5165-5298

-133

In [55]:
129+4

133

## dropped values and null values match, no nulls remain

## dropping columns

In [56]:
#droppping 'Data Item' and 'Commodity' column from new data set
df_corn_clean_dropdata = df_corn_cleaned.drop(columns = ['Data Item'])

In [57]:
df_corn_dropdatacomm = df_corn_clean_dropdata.drop(columns = ['Commodity'])

In [58]:
df_corn_dropdatacomm.head(5)

Unnamed: 0,Year,County,State,Zip code,latitude,longitude,Plant Hardiness Zone,Corn Acres Harvested,Sales ($),CV,State ANSI,Ag District,Ag District Code,"Concatenated County, State, Year"
0,2022,CREEK,OKLAHOMA,74010,35.83,-96.39,7b,244.0,71000,0.3,40,CENTRAL,50,CREEKOKLAHOMA2022
1,2022,GRADY,OKLAHOMA,73002,34.92,-97.77,7b,3063.0,2834000,0.3,40,CENTRAL,50,GRADYOKLAHOMA2022
2,2022,MCCLAIN,OKLAHOMA,73010,35.14,-97.65,7b,3167.0,1941000,0.3,40,CENTRAL,50,MCCLAINOKLAHOMA2022
3,2022,PAYNE,OKLAHOMA,74023,35.97,-96.76,7b,401.0,237000,0.3,40,CENTRAL,50,PAYNEOKLAHOMA2022
4,2022,MAJOR,OKLAHOMA,73718,36.24,-98.18,7a,4449.0,7794000,0.3,40,NORTH CENTRAL,40,MAJOROKLAHOMA2022


In [59]:
df_corn_dropdatacomm.shape

(5165, 14)

In [60]:
# checking for nulls
null_counts = df_corn_dropdatacomm.isnull().sum()
print(null_counts)

Year                                0
County                              0
State                               0
Zip code                            0
latitude                            0
longitude                           0
Plant Hardiness Zone                0
Corn Acres Harvested                0
Sales ($)                           0
CV                                  0
State ANSI                          0
Ag District                         0
Ag District Code                    0
Concatenated County, State, Year    0
dtype: int64


#### no null values

In [61]:
# renaming Concatenated
df_corn_dropdatacomm.rename(columns = {'Concatenated County, State, Year' : 'Concatenated'}, inplace = True)

In [62]:
df_corn_dropdatacomm.head()

Unnamed: 0,Year,County,State,Zip code,latitude,longitude,Plant Hardiness Zone,Corn Acres Harvested,Sales ($),CV,State ANSI,Ag District,Ag District Code,Concatenated
0,2022,CREEK,OKLAHOMA,74010,35.83,-96.39,7b,244.0,71000,0.3,40,CENTRAL,50,CREEKOKLAHOMA2022
1,2022,GRADY,OKLAHOMA,73002,34.92,-97.77,7b,3063.0,2834000,0.3,40,CENTRAL,50,GRADYOKLAHOMA2022
2,2022,MCCLAIN,OKLAHOMA,73010,35.14,-97.65,7b,3167.0,1941000,0.3,40,CENTRAL,50,MCCLAINOKLAHOMA2022
3,2022,PAYNE,OKLAHOMA,74023,35.97,-96.76,7b,401.0,237000,0.3,40,CENTRAL,50,PAYNEOKLAHOMA2022
4,2022,MAJOR,OKLAHOMA,73718,36.24,-98.18,7a,4449.0,7794000,0.3,40,NORTH CENTRAL,40,MAJOROKLAHOMA2022


In [63]:
df_corn_dropdatacomm.describe()

Unnamed: 0,Year,latitude,longitude,Corn Acres Harvested,Sales ($),CV,State ANSI,Ag District Code
count,5165.0,5165.0,5165.0,5165.0,5165.0,5165.0,5165.0,5165.0
mean,2017.554695,38.375597,-90.903293,35702.855518,29884670.0,15.408589,30.448403,48.874153
std,4.969617,4.654419,10.177359,50209.115825,51927290.0,11.361837,14.550664,25.239704
min,2012.0,24.82,-124.15,3.0,0.0,0.0,1.0,10.0
25%,2012.0,35.03,-96.84,1922.0,429000.0,8.4,19.0,30.0
50%,2022.0,38.46,-89.44,13149.0,6112000.0,13.2,29.0,50.0
75%,2022.0,41.66,-83.57,49546.0,33876000.0,18.2,45.0,70.0
max,2022.0,48.99,-67.76,353558.0,393787000.0,64.7,56.0,97.0


## addressing duplicates


In [64]:
df_dups.shape

(109, 16)

In [65]:
print(df_dups)

      Year       County          State Zip code  latitude  longitude  \
4925  2022       GREENE        ALABAMA    35443     32.75     -88.02   
4930  2022      YAVAPAI        ARIZONA    85324     34.13    -112.13   
4931  2022     GREENLEE        ARIZONA    85533     33.27    -109.27   
4938  2022        CLARK       ARKANSAS    71721     33.89     -93.21   
4947  2022       PLACER     CALIFORNIA    95602     39.00    -121.09   
...    ...          ...            ...      ...       ...        ...   
5192  2022      WHATCOM     WASHINGTON    98220     48.70    -122.19   
5193  2022     HARRISON  WEST VIRGINIA    26301     39.28     -80.33   
5195  2022      RITCHIE  WEST VIRGINIA    26148     39.07     -81.18   
5204  2022  HOT SPRINGS        WYOMING    82430     43.82    -108.18   
5205  2022       ALBANY        WYOMING    82051     41.58    -105.69   

     Plant Hardiness Zone Corn Acres Harvested  Sales ($)   CV  State ANSI  \
4925                   8b                  (D)          0

In [66]:
109/5165

0.021103581800580834

## keep first instance of duplicate, drop the other

In [77]:
df_corn_nodups = df_corn_dropdatacomm.drop_duplicates(keep='first')

In [78]:
df_nodups = df_corn_nodups[df_corn.duplicated()]
#no output means no duplicates

  df_nodups = df_corn_nodups[df_corn.duplicated()]


In [79]:
df_nodups.describe()

Unnamed: 0,Year,latitude,longitude,Corn Acres Harvested,Sales ($),CV,State ANSI,Ag District Code
count,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
mean,,,,,,,,
std,,,,,,,,
min,,,,,,,,
25%,,,,,,,,
50%,,,,,,,,
75%,,,,,,,,
max,,,,,,,,


In [83]:
df_corn_nodups.describe()

Unnamed: 0,Year,latitude,longitude,Corn Acres Harvested,Sales ($),CV,State ANSI,Ag District Code
count,5064.0,5064.0,5064.0,5064.0,5064.0,5064.0,5064.0,5064.0
mean,2017.466035,38.420087,-90.786556,35800.44952,30470400.0,15.393912,30.382504,48.92575
std,4.978725,4.625367,10.063754,50604.36272,52272780.0,11.46441,14.510079,25.229518
min,2012.0,24.82,-124.15,3.0,0.0,0.0,1.0,10.0
25%,2012.0,35.115,-96.74,1899.75,533500.0,8.4,19.0,30.0
50%,2022.0,38.51,-89.37,12834.0,6652500.0,12.6,29.0,50.0
75%,2022.0,41.68,-83.5375,49619.929412,34914000.0,18.557692,42.0,70.0
max,2022.0,48.99,-67.76,353558.0,393787000.0,64.7,56.0,97.0


In [82]:
df_corn_nodups.dtypes

Year                      int64
County                   object
State                    object
Zip code                 object
latitude                float64
longitude               float64
Plant Hardiness Zone     object
Corn Acres Harvested    float64
Sales ($)                 int64
CV                      float64
State ANSI                int64
Ag District              object
Ag District Code          int64
Concatenated             object
dtype: object

#### counts are good, all lines have same number
#### year data is good- only two values available
#### Corn Acres Harvested has no null values
#### sales data is good, no null no errors
#### Coefficient of Variation is integer
#### CV, ANSI and District codes are aligned with data

## rename data set

In [84]:
df_corn_analysis = df_corn_nodups

In [85]:
print(df_corn_analysis)

      Year    County          State Zip code  latitude  longitude  \
0     2022     CREEK       OKLAHOMA    74010     35.83     -96.39   
1     2022     GRADY       OKLAHOMA    73002     34.92     -97.77   
2     2022   MCCLAIN       OKLAHOMA    73010     35.14     -97.65   
3     2022     PAYNE       OKLAHOMA    74023     35.97     -96.76   
4     2022     MAJOR       OKLAHOMA    73718     36.24     -98.18   
...    ...       ...            ...      ...       ...        ...   
5293  2022   KANAWHA  WEST VIRGINIA    25015     38.23     -81.53   
5294  2022   LINCOLN  WEST VIRGINIA    25003     38.28     -81.84   
5295  2022   ASHLAND      WISCONSIN    54514     46.01     -90.49   
5296  2022   NATRONA        WYOMING    82601     42.83    -106.32   
5297  2022  NIOBRARA        WYOMING    82222     43.20    -104.65   

     Plant Hardiness Zone  Corn Acres Harvested  Sales ($)   CV  State ANSI  \
0                      7b                 244.0      71000  0.3          40   
1            

## export

In [86]:
df_corn_analysis.to_csv(os.path.join(path, '02 Data','Cleaned', 'df_corn_analysis.csv'))