# Python for Meteorology, Oceanography, and Climate

## Numerical Analysis with NumPy:

In [2]:
import numpy as np
import pandas as pd

### Array operations (creation, indexing, slicing)

In [3]:
df = pd.read_csv('dataset/data_extended.csv')
df.head()

Unnamed: 0,EmployeeID,Age,City,Salary,Experience,Performance,BonusPercentage,DistanceFromOffice
0,1,28,New York,60000,5,88,5.0,15
1,2,32,London,75000,7,92,7.5,25
2,3,25,Paris,55000,3,80,3.0,8
3,4,38,Tokyo,90000,10,95,10.0,30
4,5,29,New York,65000,6,90,6.0,18


In [4]:
# Extract relevant columns as NumPy arrays
ages = np.array(df['Age'])
salaries = np.array(df['Salary'])
experience = np.array(df['Experience'])
performance = np.array(df['Performance'])
bonus = np.array(df['BonusPercentage'])
distance = np.array(df['DistanceFromOffice'])

In [6]:
# Array Creation
zeros_array = np.zeros(5)
ones_array = np.ones(3)
range_array = np.arange(10, 20, 2)
random_array = np.random.rand(2, 3) # 2x3 array of random numbers between 0 and 1.

print("Zeros Array:", zeros_array)
print("Ones Array:", ones_array)
print("Range Array:", range_array)
print("Random Array:\n", random_array)

Zeros Array: [0. 0. 0. 0. 0.]
Ones Array: [1. 1. 1.]
Range Array: [10 12 14 16 18]
Random Array:
 [[0.1815091  0.87061182 0.73883749]
 [0.45447048 0.51965378 0.57631039]]


In [7]:
# Indexing
print("First Age:", ages[0])
print("Last Salary:", salaries[-1])

First Age: 28
Last Salary: 80000


In [8]:
# Slicing
print("Ages 2 to 4:", ages[1:4])
print("Salaries from the beginning to the 3rd element:", salaries[:3])


Ages 2 to 4: [32 25 38]
Salaries from the beginning to the 3rd element: [60000 75000 55000]


In [9]:
# Boolean Indexing
high_performers = salaries[performance > 90]
print("Salaries of High Performers:", high_performers)

Salaries of High Performers: [75000 90000 78000 95000 80000]


In [11]:
# Reshaping
print(experience)
reshaped_experience = experience.reshape((2, 5)) #Reshapes experience array into a 2x5 matrix
print("Reshaped Experience:\n", reshaped_experience)

[ 5  7  3 10  6  8  4 12  5  9]
Reshaped Experience:
 [[ 5  7  3 10  6]
 [ 8  4 12  5  9]]


### Linear algebra operations (matrix multiplication, inversion)

In [12]:
# Matrix Multiplication
matrix_a = np.array([[1, 2], [3, 4]])
matrix_b = np.array([[5, 6], [7, 8]])
result_matrix = np.matmul(matrix_a, matrix_b)
print("Matrix Multiplication Result:\n", result_matrix)

Matrix Multiplication Result:
 [[19 22]
 [43 50]]


In [13]:
# Matrix Inversion (create a matrix that is invertable)
invertable_matrix = np.array([[2, 1], [5, 3]])
try:
    inverse_matrix = np.linalg.inv(invertable_matrix)
    print('Inverse Matrix:\n', inverse_matrix)
except np.linalg.LinAlgError:
    print('Matrix is not invertible.')

Inverse Matrix:
 [[ 3. -1.]
 [-5.  2.]]


In [14]:
# Solving Linear Equations
coefficients = np.array([[2, 1], [5, 3]])
constants = np.array([1, 2])
solution = np.linalg.solve(coefficients, constants)
print("Solution of Linear Equations:\n", solution)

Solution of Linear Equations:
 [ 1. -1.]


### Numerical integration and differentiation

In [16]:
# Example function: f(x) = x^2 / 10 (using age as x)
def f(x):
    return x**2 / 10

age_integration_x = np.linspace(ages.min(), ages.max(), 100)
age_integration_y = f(age_integration_x)
age_integral = np.trapezoid(age_integration_y, age_integration_x)
print("Numerical Integration (Age Function):", age_integral)

# Numerical Differentiation (example: rate of change of salary with respect to experience)
dx = experience[1] - experience[0]
salary_derivative = np.gradient(salaries, dx)
print("Numerical Differentiation (Salary vs. Experience):\n", salary_derivative)

Numerical Integration (Age Function): 1612.505739210285
Numerical Differentiation (Salary vs. Experience):
 [ 7500. -1250.  3750.  2500. -3000. -1750.  4250.  1000. -3750.  9000.]


In [17]:
#Example with a different function.
def g(x):
  return np.sin(x) * np.exp(-x/10)

x_vals2 = np.linspace(0, 10, 200)
y_vals2 = g(x_vals2)
integral_result2 = np.trapezoid(y_vals2, x_vals2)
derivative_result2 = np.gradient(y_vals2, x_vals2[1] - x_vals2[0])

print("\nIntegral of g(x):", integral_result2)
print("\nDerivative of g(x) first 5:", derivative_result2[:5])


Integral of g(x): 1.3152640438205485

Derivative of g(x) first 5: [0.99456878 0.9883343  0.97468184 0.958693   0.94043173]


### Advanced Example JAX

In [21]:
import jax
import jax.numpy as jnp
import pandas as pd

# Extract features and target variable
features = jnp.array(df[['Age', 'Experience', 'Performance', 'BonusPercentage', 'DistanceFromOffice']])
target = jnp.array(df['Salary'])

# Add a bias term (intercept)
features = jnp.concatenate([jnp.ones((features.shape[0], 1)), features], axis=1)

# Linear regression function using JAX
def linear_regression(features, target):
    """Computes linear regression using JAX."""
    weights = jnp.linalg.lstsq(features, target)[0]
    return weights

# Calculate the weights
weights = linear_regression(features, target)

# Print the weights
print("Weights:", weights)

# Function to predict salary
def predict_salary(features, weights):
    """Predicts salary using the learned weights."""
    features = jnp.concatenate([jnp.ones((features.shape[0], 1)), features], axis=1) # add bias term
    return jnp.dot(features, weights)

# Example prediction for a new employee
new_employee = jnp.array([[30, 6, 90, 6.0, 20]])  # Example features
predicted_salary = predict_salary(new_employee, weights)
print("Predicted Salary:", predicted_salary[0])

# Example prediction for all employees in the dataset.
all_predictions = predict_salary(features[:, 1:], weights) #features[:,1:] to avoid the bias column.
print("Predicted Salaries for all employees:\n", all_predictions)

# Compute Mean Absolute Error (MAE)
def calculate_mae(predictions, actual):
    """Calculates the Mean Absolute Error."""
    return jnp.mean(jnp.abs(predictions - actual))

mae = calculate_mae(all_predictions, target)
print("Mean Absolute Error (MAE):", mae)

Weights: [10144.317    2202.6875   -611.4849   -178.81647  1500.8699    131.59868]
Predicted Salary: 68099.75
Predicted Salaries for all employees:
 [62504.63  74445.31  54627.133 89700.69  65633.87  76213.33  57624.773
 96364.01  61720.426 79165.74 ]
Mean Absolute Error (MAE): 900.5102
