In [37]:
import pandas as pd

# apple_quality dataset adjustment

## data adaptation

In [38]:
apple_quality = pd.read_csv("apple_quality.csv", nrows=4000, usecols=[2, 3, 5, 7])
apple_quality = apple_quality.astype({col: int for col in apple_quality.columns})
apple_quality = apple_quality.apply(abs).add(2).clip(1, 5)
apple_quality['Weight'] = apple_quality['Weight'].sub(1).clip(1,3)

# apple_quality["Quality"] = (
#     (apple_quality['Weight'] * apple_quality["Sweetness"])
#     + apple_quality["Crunchiness"]
#     + apple_quality["Juiciness"]
#     - apple_quality["Acidity"]
# )
apple_quality.columns = map(str.lower, apple_quality.columns)


In [39]:
apple_quality = apple_quality.sample(frac=1, random_state=8)
apple_quality.head()

Unnamed: 0,weight,sweetness,juiciness,acidity
1975,2,4,2,3
3547,2,2,3,4
3498,3,4,4,5
705,1,2,2,3
416,3,2,2,3


In [40]:
from collections import deque
import statistics 

avg_sweet10 = deque(maxlen=10)
avg_sweet = 0
qual = []

for _, row in apple_quality.iterrows():
    quality = (row['sweetness'] - avg_sweet) + (row['acidity'] // row['weight'])
    # quality = (row['sweetness']) - (avg_sweet * (row['juiciness']) - (row['acidity'] // row['weight'])))
    qual.append(quality)
    avg_sweet10.append(quality)
    avg_sweet = statistics.mean(avg_sweet10)


In [41]:
apple_quality['quality'] = qual

In [42]:
apple_quality.head(10)

Unnamed: 0,weight,sweetness,juiciness,acidity,quality
1975,2,4,2,3,5
3547,2,2,3,4,-1
3498,3,4,4,5,3
705,1,2,2,3,3
416,3,2,2,3,1
1997,2,2,2,3,1
319,2,2,4,4,2
3443,1,2,2,3,3
732,1,5,5,3,6
1036,2,3,2,2,2


In [43]:
# apple_quality = apple_quality.drop('quality', axis=1)
# apple_quality.to_csv("apples.csv")

# possible quality measurement 

## divition to "train" and "prod"

In [44]:
apple_quality.head()

Unnamed: 0,weight,sweetness,juiciness,acidity,quality
1975,2,4,2,3,5
3547,2,2,3,4,-1
3498,3,4,4,5,3
705,1,2,2,3,3
416,3,2,2,3,1


apples_quality.head()

In [45]:
# apple_quality = apple_quality.drop('quality', axis=1)
# apple_quality = apple_quality[apple_quality['quality'] >= 0]
qual_col = apple_quality['quality']
apple_quality = apple_quality.drop('quality', axis=1)
apple_quality.to_csv("apples.csv")
apple_quality.head(10)

Unnamed: 0,weight,sweetness,juiciness,acidity
1975,2,4,2,3
3547,2,2,3,4
3498,3,4,4,5
705,1,2,2,3
416,3,2,2,3
1997,2,2,2,3
319,2,2,4,4
3443,1,2,2,3
732,1,5,5,3
1036,2,3,2,2


In [46]:
import csv
import struct

def csv_to_binary(csv_file, binary_file):
    with open(csv_file, 'r') as f:
        reader = csv.reader(f)
        next(reader) #skip the header line
        with open(binary_file, 'wb') as bf:
            for row in reader:
                # Assuming each row contains integers
                # binary_data = struct.pack('i' * len(row), *map(int, row))
                ns = list(map(int, row[1:]))
                # print(ns)
                binary_data = bytearray(ns)
                # print(binary_data)
                bf.write(binary_data)
                bf.write(b'\n')

# Example usage:
csv_to_binary('apples.csv', 'apples.bin')

In [47]:
import csv
import struct

def binary_to_csv(binary_file):
    with open(binary_file, 'rb') as bf:
        for l in bf.readlines():
            if l != b'\n':
                values = []
                for i in range(0, len(l)-4, 4):
                    # v = struct.unpack('i', l[i: i+4])
                    v = bytearray(l)[:-1]
                    values += v  # Unpack the binary data
                print(values)

# Example usage:
binary_to_csv('apples.bin')


[2, 4, 2, 3]
[2, 2, 3, 4]
[3, 4, 4, 5]
[1, 2, 2, 3]
[3, 2, 2, 3]
[2, 2, 2, 3]
[2, 2, 4, 4]
[1, 2, 2, 3]
[1, 5, 5, 3]
[2, 3, 2, 2]
[2, 3, 3, 5]
[1, 2, 5, 2]
[2, 2, 4, 3]
[2, 3, 4, 4]
[2, 2, 2, 3]
[1, 2, 2, 4]
[3, 3, 3, 4]
[1, 4, 2, 4]
[3, 3, 2, 3]
[2, 4, 2, 2]
[2, 5, 2, 4]
[2, 4, 4, 3]
[3, 2, 2, 4]
[1, 2, 4, 4]
[2, 4, 5, 4]
[1, 3, 3, 2]
[1, 3, 2, 3]
[1, 3, 3, 5]
[1, 2, 2, 3]
[1, 4, 3, 2]
[1, 4, 3, 3]
[1, 2, 2, 4]
[1, 2, 3, 2]
[2, 3, 3, 3]
[1, 4, 3, 2]
[1, 2, 3, 4]
[2, 4, 2, 2]
[3, 3, 2, 5]
[3, 5, 4, 3]
[1, 4, 2, 3]
[2, 5, 5, 3]
[1, 3, 3, 3]
[2, 2, 4, 2]
[3, 2, 5, 4]
[3, 5, 4, 2]
[2, 2, 2, 2]
[2, 3, 2, 3]
[3, 3, 2, 4]
[2, 5, 3, 2]
[2, 5, 2, 2]
[1, 2, 2, 3]
[2, 2, 5, 5]
[1, 5, 2, 4]
[1, 4, 4, 4]
[1, 2, 3, 3]
[3, 5, 5, 4]
[2, 4, 5, 3]
[2, 2, 3, 4]
[3, 3, 3, 2]
[3, 2, 3, 4]
[3, 5, 2, 3]
[2, 2, 2, 3]
[3, 2, 2, 2]
[2, 2, 3, 3]
[2, 5, 3, 2]
[3, 5, 2, 2]
[1, 5, 2, 4]
[3, 3, 5, 3]
[2, 2, 3, 5]
[2, 4, 3, 3]
[3, 5, 4, 2]
[3, 3, 4, 5]
[3, 5, 5, 4]
[2, 4, 5, 5]
[3, 2, 2, 3]
[3, 2, 2, 3]
[3, 3, 2, 3]

In [48]:
import csv

def row_to_bytearray(csv_row):
    # Extract string and integer from the CSV row
    csv_string = csv_row[0]
    csv_integer = int(csv_row[1])  # Assuming the integer is at index 1

    # Convert string to bytes
    string_bytes = csv_string.encode('utf-8')
    string_length_bytes = len(csv_string).to_bytes((len(csv_string).bit_length() + 7) // 8, 'big')
    # Convert integer to bytes
    integer_bytes = csv_integer.to_bytes((csv_integer.bit_length() + 7) // 8, 'big',signed=True)

    # Concatenate bytes
    combined_bytes = string_length_bytes+string_bytes + integer_bytes

    # Create a bytearray
    byte_array = bytearray(combined_bytes)

    return byte_array

# Example usage
'''
Apple 1 ->        3
Apple 2 ->        2
Apple 3 ->        2
Apple 4 ->       -2
Apple 5 ->        0
'''
csv_rows = []
apples = ["First Apple", "Second Apple","Third Apple","Forth Apple", "Fifth Apple"]

for ap, qual in zip(apples, qual_col):
    csv_rows.append([ap, qual])

byte_arrays=[]
for row in csv_rows:
    byte_arrays.append(row_to_bytearray(row))
    


In [49]:
with open("apples.ref.bin", 'wb') as bf:
    for row in byte_arrays:
        bf.write(row)
        bf.write(b'\n')