In [10]:
import numpy as np

### Task 1
1. Load two arrays from the files `data1.csv` and `data2.csv` (assuming they have the same size).  
2. Perform addition, multiplication, and division of these arrays element-wise.  
3. For all elements in the arrays that fall outside the range \([a, b]\), set them to zero.  
4. Divide the remaining elements by the average of the array elements.  
5. Save the results into three separate files:
   - `out1.csv` for addition,
   - `out2.csv` for multiplication,
   - `out3.csv` for division.


In [11]:
def zero_and_normalize(array: np.ndarray, a: float, b: float) -> np.ndarray:
     # Set elements outside the range [a, b] to zero
    array[(array < a) | (array > b)] = 0

    # Extract non-zero elements and calculate their mean
    non_zero_elements = array[array != 0]
    mean_value = np.mean(non_zero_elements) if len(non_zero_elements) > 0 else 0

    # Normalize non-zero elements by dividing them by the mean
    if mean_value != 0:
        array[array != 0] /= mean_value

    return array

def process_matrix(input_file_1: str, input_file_2: str, output_file_1: str, output_file_2: str, output_file_3: str, ab_range: list) -> None:
    try:
        # Load data from the input CSV files
        array1 = np.loadtxt(input_file_1, delimiter=',')
        array2 = np.loadtxt(input_file_2, delimiter=',')

        # Ensure both arrays have the same dimensions
        if array1.shape != array2.shape:
            print("Arrays should have the same shape")
            return

        # Perform element-wise addition, multiplication, and division
        result_add = array1 + array2
        result_multiply = array1 * array2
        result_divide = array1 / array2

        # Display the intermediate results
        print('---INPUT---')
        print(result_add)
        print(result_multiply)
        print(result_divide)

        # Unpack the range values
        a, b = ab_range

        # Apply zeroing and normalization to the results
        result_add_norm = zero_and_normalize(result_add, a, b)
        result_multiply_norm = zero_and_normalize(result_multiply, a, b)
        result_divide_norm = zero_and_normalize(result_divide, a, b)

        # Display the final normalized results
        print('---OUTPUT---')
        print(result_add_norm)
        print(result_multiply_norm)
        print(result_divide_norm)

        # Save the processed results to output files
        np.savetxt(output_file_1, result_add, delimiter=',')
        np.savetxt(output_file_2, result_multiply, delimiter=',')
        np.savetxt(output_file_3, result_divide, delimiter=',')

    except Exception as e:
        # Handle and display any exceptions that occur
        print(f"Error: {e}")


# Example usage of the process_matrix function
process_matrix('data2/data1.csv', 'data2/data2.csv', 'data2/out1.txt', 'data2/out2.txt', 'data2/out3.txt', [1, 10])


---INPUT---
[[ 12.   14.   16. ]
 [  4.5   6.    8. ]
 [ 11.   16.  109. ]]
[[ 11.  24.  39.]
 [  2.   5.  12.]
 [ 28.  64. 900.]]
[[0.09090909 0.16666667 0.23076923]
 [8.         5.         3.        ]
 [1.75       1.         0.09      ]]
---OUTPUT---
[[0.         0.         0.        ]
 [0.72972973 0.97297297 1.2972973 ]
 [0.         0.         0.        ]]
[[0.         0.         0.        ]
 [0.57142857 1.42857143 0.        ]
 [0.         0.         0.        ]]
[[0.         0.         0.        ]
 [2.13333333 1.33333333 0.8       ]
 [0.46666667 0.26666667 0.        ]]


### Task 2
1. Load data from the file `data1.csv`, which contains a two-dimensional array of floating-point numbers.  
2. Select elements from specified columns that are greater than a given value.  
3. The resulting 2D array should contain numbers that serve as inputs to the function \( f(x) = 2 \cdot x + 4 \).  
4. Compute \( f(x) \) for each element of the filtered array.  
5. Save the resulting matrix of \( f(x) \) values to the file `res1.csv`.

In [12]:
def process_data(input_file: str, output_file: str, columns: list, threshold: int) -> None:
    # Load data from the input CSV file
    array1 = np.loadtxt(input_file, delimiter=',')

    # Extract the specified columns from the array
    elements = array1[:, columns]

    # Filter elements that are greater than the specified threshold
    filtered_elements = elements[elements > threshold]

    # Print the original elements and the filtered elements for inspection
    print(elements)
    print(filtered_elements)

    # Apply the linear transformation f(x) = 2 * x + 4 to the filtered elements
    new_array = 2 * filtered_elements + 4

    # Save the resulting array to the output file in CSV format
    np.savetxt(output_file, new_array, delimiter=',', fmt='%f')

# Example usage of the process_data function
process_data('data2/data3.csv', 'data2/out4.csv', [1, 3], 5)

[[2.2 4.4]
 [4.4 6.5]
 [6.6 2.2]
 [8.8 5.5]
 [1.  2.2]]
[6.5 6.6 8.8 5.5]


### Task 3
1. Assume there is an arbitrary number of `.csv` files named `data1.csv`, `data2.csv`, `data3.csv`, etc.  
2. Combine these files into a single large matrix, appending them in order. For example:  

   - `data1.csv`:
     
     \begin{bmatrix}
     1 & 2 \\
     3 & 4
     \end{bmatrix}
     
     
   - `data2.csv`:
     
     \begin{bmatrix}
     5 & 6 \\
     7 & 8
     \end{bmatrix}
     

   Combined matrix:
   
   \begin{bmatrix}
   1 & 2 \\
   3 & 4 \\
   5 & 6 \\
   7 & 8
   \end{bmatrix}
   

3. Identify rows in this combined matrix that have an average greater than a specified value.  
4. Save these rows as a new array to the file `out1.csv`.

In [13]:
import numpy as np
import glob

def process_csv_files(threshold):
    # Retrieve all CSV files named data1.csv, data2.csv, etc., in the data2 directory
    file_list = sorted(glob.glob("data2/datax*.csv"))

    # List to store data from all CSV files
    data = []

    # Load data from each file and append it to the list
    for file in file_list:
        array = np.loadtxt(file, delimiter=",")
        data.append(array)

    # Combine all the data into a single matrix (vertically stack arrays)
    combined_data = np.vstack(data)

    # Compute the mean of each row
    row_means = np.mean(combined_data, axis=1)

    # Filter the rows where the mean is greater than the threshold
    filtered_rows = combined_data[row_means > threshold]

    # Save the filtered rows to a new file (out1.csv)
    np.savetxt("data2/out5.csv", filtered_rows, delimiter=",", fmt="%d")
    print(combined_data)
    print('After filtering')
    print(filtered_rows)

# Example usage of the process_csv_files function
process_csv_files(threshold=4)


[[1. 2. 3.]
 [4. 5. 6.]
 [7. 8. 9.]
 [1. 2. 3.]
 [0. 0. 0.]
 [1. 2. 3.]
 [4. 5. 6.]
 [7. 8. 9.]
 [1. 2. 3.]
 [0. 0. 0.]
 [3. 2. 1.]]
After filtering
[[4. 5. 6.]
 [7. 8. 9.]
 [4. 5. 6.]
 [7. 8. 9.]]
