## Khai báo thư viện sử dụng

In [263]:
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')
from sklearn.decomposition import NMF

## Đọc bảng dữ liệu

In [266]:
Minerals_Database = pd.read_csv('Minerals_Database.csv')

Minerals_Database_filter = Minerals_Database[['Name', 'Magnesium', 'Calcium', 'Iron','Molar Mass']]
Minerals_Database_use = Minerals_Database_filter[(Minerals_Database_filter['Magnesium'] > 0) 
                                                | (Minerals_Database_filter['Calcium'] > 0) 
                                                | (Minerals_Database_filter['Iron'] > 0)]
Minerals_Database_sort = Minerals_Database_use.sort_values(by='Magnesium', ascending=False).head(10)

display(Minerals_Database_use)

Unnamed: 0,Name,Magnesium,Calcium,Iron,Molar Mass
4,Actinolite,4.0,1.0,4.0,861.185368
7,Adelite,1.0,1.0,0.0,251.283292
8,Admontite,2.0,0.0,0.0,407.639360
9,Aegirine,0.0,0.0,1.0,154.920468
10,Aenigmatite,4.0,1.0,7.0,1110.587536
...,...,...,...,...,...
3104,Zinnwaldite-2M1,0.0,0.0,3.0,699.777801
3106,Zirsinalite,0.0,1.0,0.0,269.364636
3107,Zoltaiite,1.0,0.0,2.0,677.090039
3110,Zwieselite,0.0,0.0,2.0,225.663765


# Tính phần trăm 

In [269]:
Minerals_Database_use = Minerals_Database_use.copy()

Minerals_Database_use['MgO_Percent'] = (40 * Minerals_Database_use['Magnesium'] * 100) / Minerals_Database_use['Molar Mass']
Minerals_Database_use['CaO_Percent'] = (56 * Minerals_Database_use['Calcium'] * 100) / Minerals_Database_use['Molar Mass']
Minerals_Database_use['FeO_Percent'] = (72 * Minerals_Database_use['Iron'] * 100) / Minerals_Database_use['Molar Mass']

Minerals_Database_use = Minerals_Database_use.drop(columns=['Magnesium', 'Calcium', 'Iron', 'Molar Mass'])

display(Minerals_Database_use)

Unnamed: 0,Name,MgO_Percent,CaO_Percent,FeO_Percent
4,Actinolite,18.579043,6.502665,33.442277
7,Adelite,15.918289,22.285604,0.000000
8,Admontite,19.625190,0.000000,0.000000
9,Aegirine,0.000000,0.000000,46.475460
10,Aenigmatite,14.406789,5.042376,45.381385
...,...,...,...,...
3104,Zinnwaldite-2M1,0.000000,0.000000,30.866941
3106,Zirsinalite,0.000000,20.789663,0.000000
3107,Zoltaiite,5.907634,0.000000,21.267482
3110,Zwieselite,0.000000,0.000000,63.811751


## Cách 1: giảm chiều bằng NMF

In [272]:
if "Name" in Minerals_Database_use.columns:
    Minerals_Database_use = Minerals_Database_use.drop(columns=["Name"])

V = Minerals_Database_use.values

nmf = NMF(n_components=3, init='random', random_state=40)

W = nmf.fit_transform(V) 
H = nmf.components_       

print("Ma trận cơ sở W:")
display(pd.DataFrame(W, columns=[f"Dimension {i+1}" for i in range(3)]))
print("Ma trận trọng số H:")
display(pd.DataFrame(H, columns=Minerals_Database_use.columns))

reconstructed_data = np.dot(W, H)
reconstructed_data_rounded = np.round(reconstructed_data, 5)
print("Dữ liệu tái tạo (reconstructed data):")
display(pd.DataFrame(reconstructed_data_rounded, columns=Minerals_Database_use.columns))

Ma trận cơ sở W:


Unnamed: 0,Dimension 1,Dimension 2,Dimension 3
0,15.422668,1.225292,2.372810
1,0.000000,4.199259,2.033192
2,0.000000,0.000000,2.506692
3,21.433217,0.000000,0.000000
4,20.928659,0.950131,1.839806
...,...,...,...
1809,14.234993,0.000000,0.000000
1810,0.000000,3.917380,0.000000
1811,9.807984,0.000000,0.754411
1812,29.428242,0.000000,0.000000


Ma trận trọng số H:


Unnamed: 0,MgO_Percent,CaO_Percent,FeO_Percent
0,4.4e-05,0.0,2.168385
1,1.5e-05,5.307032,0.0
2,7.829249,7.756732e-08,0.0


Dữ liệu tái tạo (reconstructed data):


Unnamed: 0,MgO_Percent,CaO_Percent,FeO_Percent
0,18.57802,6.50266,33.44228
1,15.91843,22.28560,0.00000
2,19.62552,0.00000,0.00000
3,0.00094,0.00000,46.47546
4,14.40524,5.04238,45.38138
...,...,...,...
1809,0.00063,0.00000,30.86694
1810,0.00006,20.78966,0.00000
1811,5.90690,0.00000,21.26748
1812,0.00129,0.00000,63.81175


## Cách 2 

In [275]:
Minerals_Database_percent['CO2'] = ( Minerals_Database_use['MgO_Percent'] * 1.092 
                                + Minerals_Database_use['CaO_Percent'] * 0.785 
                                + Minerals_Database_use['FeO_Percent'] * 0.859)

Minerals_Database_percent_sort = Minerals_Database_percent[['Name','MgO_Percent','CaO_Percent','FeO_Percent', 'CO2']].head(10)
display(Minerals_Database_percent_sort)

Unnamed: 0,Name,MgO_Percent,CaO_Percent,FeO_Percent,CO2
873,Antigorite-T,42.91981,0.0,0.0,46.868432
66,Antigorite,44.59062,0.0,0.0,48.692957
872,Antigorite-M,45.11776,0.0,0.0,49.268593
902,Arrojadite-(KNa),18.877308,2.032941,36.592935,53.643209
1603,Gottardiite,14.519609,20.327453,23.957356,52.391833
2796,Stornesite-(Y),21.757246,6.092029,35.246739,58.818105
1461,Filipstadite,28.311789,0.0,25.48061,52.804318
934,Bannisterite,11.041868,1.545862,19.875362,30.344157
2838,Takeuchiite,25.056912,0.0,0.0,27.362148
1867,Khmaralite,12.554887,0.0,47.708572,54.6916


In [277]:
Name = ['Antigorite-T', 'Antigorite', 'Arrojadite-(KNa)', 'Gottardiite', 'Khmaralite']

Minerals_Database_sort = Minerals_Database_percent_sort[Minerals_Database_percent_sort['Name'].isin(Name)]

Minerals_Database_sort['Name'] = pd.Categorical(Minerals_Database_sort['Name'], categories=Name, ordered=True)
Minerals_Database_sort2 = Minerals_Database_sort.sort_values(by='Name')

display(Minerals_Database_sort2)


Unnamed: 0,Name,MgO_Percent,CaO_Percent,FeO_Percent,CO2
873,Antigorite-T,42.91981,0.0,0.0,46.868432
66,Antigorite,44.59062,0.0,0.0,48.692957
902,Arrojadite-(KNa),18.877308,2.032941,36.592935,53.643209
1603,Gottardiite,14.519609,20.327453,23.957356,52.391833
1867,Khmaralite,12.554887,0.0,47.708572,54.6916
