# **Code Used In NumPy Practical Examples: Useful Techniques**

In [None]:
# !python -m pip install numpy
# !python -m pip install matplotlib

**Example 1: Creating Multi-Dimensional Arrays From Files**

In [None]:
import numpy as np
from pathlib import Path

array = np.zeros((3, 2, 3))
print(id(array))

for file_count, csv_file in enumerate(sorted(Path.cwd().glob("file?.csv"))):
    array[file_count] = np.loadtxt(csv_file.name, delimiter=",")

print(id(array))
print(array.shape)
array

In [None]:
array = np.zeros((4, 2, 3))

for file_count, csv_file in enumerate(sorted(Path.cwd().glob("file?.csv"))):
    array[file_count] = np.loadtxt(csv_file.name, delimiter=",")

array[3, 0] = np.loadtxt("short_file.csv", delimiter=",")

array

In [None]:
array = np.zeros((4, 2, 3))
print(id(array))

for file_count, csv_file in enumerate(sorted(Path.cwd().glob("file?.csv"))):
    array[file_count] = np.loadtxt(csv_file.name, delimiter=",")

array = np.insert(arr=array, obj=2, values=0, axis=1)
array[3] = np.loadtxt("long_file.csv", delimiter=",")

print(id(array))
array

**Example 2: Reconciling Data Using Structured NumPy Arrays**

In [None]:
import numpy as np

race_results = np.array(
    [
        ("At The Back", 1.2, 3),
        ("Fast Eddie", 1.3, 1),
        ("Almost There", 1.1, 2),
    ],
    dtype=[
        ("horse_name", "U12"),
        ("price", "f4"),
        ("position", "i4"),
    ],
)

race_results["horse_name"]

In [None]:
np.sort(race_results, order="position")[["horse_name", "price"]]

In [None]:
race_results[race_results["position"] == 1]["horse_name"]

In [None]:
import numpy.lib.recfunctions as rfn
from pathlib import Path

issued_dtypes = [
    ("id", "i8"),
    ("payee", "U10"),
    ("amount", "f8"),
    ("date_issued", "U10"),
]
cashed_dtypes = [("id", "i8"), ("amount", "f8"), ("date_cashed", "U10")]

issued_checks = np.loadtxt(
    Path("issued_checks.csv"), delimiter=",", dtype=issued_dtypes, skiprows=1
)
cashed_checks = np.loadtxt(
    Path("cashed_checks.csv"), delimiter=",", dtype=cashed_dtypes, skiprows=1
)

cashed_check_details = rfn.rec_join(
    "id", issued_checks, cashed_checks, jointype="inner"
)
cashed_check_details[["payee", "date_issued", "date_cashed"]]

In [None]:
# The following gives an error, the amount column has been renamed
#
# cashed_check_details[["payee", "date_issued", "date_cashed", "amount"]]

In [None]:
cashed_check_details[["payee", "date_issued", "date_cashed", "amount1"]]

In [None]:
outstanding_checks = [
    check_id
    for check_id in issued_checks["id"]
    if check_id not in cashed_checks["id"]
]

[int(i) for i in outstanding_checks]

In [None]:
[i for i in cashed_checks["id"] if i not in issued_checks["id"]]

**NumPy Example 3: Analyzing and Charting Hierarchical Data**

In [None]:
import numpy as np
from pathlib import Path

days = ["mon", "tue", "wed", "thu", "fri"]
days_dtype = [(day, "f8") for day in days]
company_dtype = [("company", "U20"), ("sector", "U20")]

portfolio_dtype = np.dtype(company_dtype + days_dtype)
portfolio = np.zeros((6,), dtype=portfolio_dtype)
portfolio

In [None]:
companies = np.loadtxt(
    Path("portfolio.csv"),
    delimiter=",",
    dtype=company_dtype,
    skiprows=1,
).reshape((6,))

portfolio[["company", "sector"]] = companies
portfolio

In [None]:
share_prices_dtype = [
    ("company", "U20"),
    ("day", "f8"),
]

for day, csv_file in zip(days, sorted(Path.cwd().glob("share_prices-?.csv"))):
    portfolio[day] = np.loadtxt(
        csv_file.name,
        delimiter=",",
        dtype=share_prices_dtype,
        skiprows=1,
    )["day"]

portfolio

In [None]:
filtered_array = portfolio[portfolio["company"] == "Company_C"]
filtered_array

In [None]:
portfolio[portfolio["sector"] == "technology"]["fri"]

In [None]:
portfolio[portfolio["sector"] == "technology"]["fri"] * 250 * 0.01

In [None]:
sum(portfolio[portfolio["sector"] == "technology"]["fri"] * 250 * 0.01)

In [None]:
import matplotlib.pyplot as plt

tech_mask = portfolio["sector"] == "technology"
tech_sector = portfolio[tech_mask]["company"]
tech_valuation = portfolio[tech_mask]["fri"] * 250 * 0.01

plt.bar(x=tech_sector, height=tech_valuation, data=tech_valuation)[
    0
].set_color("g")

plt.xlabel("Tech Companies")
plt.ylabel("Friday Price ($)")
plt.title("Tech Share Valuation ($)")
plt.show()

In [None]:
cities = ["london", "new_york", "rome"]
cities_dtype = [(city, "i8") for city in cities]
city_files_dtype = [("month", "U20"), ("temp", "i8")]
weather_data_dtype = np.dtype([("month", "U20")] + cities_dtype)
weather_data = np.zeros((12,), dtype=weather_data_dtype)

for city in cities:
    temps = np.loadtxt(
        Path(f"{city}_temperatures.csv"),
        delimiter=",",
        dtype=city_files_dtype,
    )
    weather_data[["month", city]] = temps

weather_data

**Example 4: Writing Your Own Vectorization Functions**

In [None]:
import numpy as np
from pathlib import Path

share_dtypes = [
    ("company", "U20"),
    ("sector", "U20"),
    ("mon", "f8"),
    ("tue", "f8"),
    ("wed", "f8"),
    ("thu", "f8"),
    ("fri", "f8"),
]

portfolio = np.loadtxt(
    Path("full_portfolio.csv"),
    delimiter=",",
    dtype=share_dtypes,
    skiprows=1,
)

portfolio["fri"] - portfolio["mon"]

In [None]:
def profit_with_bonus(first_day, last_day):
    if last_day >= first_day * 1.01:
        return (last_day - first_day) * 1.1
    else:
        return last_day - first_day


# The following causes an error because in_profit() doesn't know
# how to work with NumPy arrays:
#
# profit_with_bonus(portfolio["mon"], portfolio["fri"])

In [None]:
def profit_with_bonus(first_day, last_day):
    if last_day >= first_day * 1.01:
        return (last_day - first_day) * 1.1
    else:
        return last_day - first_day


vectorized_profit_with_bonus = np.vectorize(profit_with_bonus)
vectorized_profit_with_bonus(portfolio["mon"], portfolio["fri"])

In [None]:
profit_with_bonus(3, 5)

In [None]:
@np.vectorize
def profit_with_bonus(first_day, last_day):
    if last_day >= first_day * 1.01:
        return (last_day - first_day) * 1.1
    else:
        return last_day - first_day


profit_with_bonus(portfolio["mon"], portfolio["fri"])

In [None]:
profit_with_bonus(3, 5)

In [None]:
np.where(
    portfolio["fri"] > portfolio["mon"] * 1.01,
    (portfolio["fri"] - portfolio["mon"]) * 1.1,
    portfolio["fri"] - portfolio["mon"],
)