In [1]:
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset
from sklearn.preprocessing import StandardScaler
from azureml.core import Dataset
from azure.identity import DefaultAzureCredential
from azure.ai.ml import MLClient
import os
from azureml.core import Workspace, Datastore, Run, Model, Experiment
import gc
import pickle

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Initialize the ML client
ml_client = MLClient.from_config(credential=DefaultAzureCredential())

#WS Init -> for persistance
ws = Workspace.from_config()  # Assumes there is a config.json file in the current directory
datastore = ws.get_default_datastore()

Found the config file in: /config.json


In [7]:

# Load a small sample to determine the number of columns
data_assetTest = ml_client.data.get("MH100k_1", version="1")
sample_data_test = pd.read_csv(data_assetTest.path, nrows=5)  # Load the first 5 rows
#experiment = Experiment(workspace=ws, name='malware-detection-experiment3')
#run = experiment.start_logging()
sample_data_test

Unnamed: 0,SHA256,NOME,PACOTE,API_MIN,API,Permission::WAKE_LOCK,Permission::WRITE_EXTERNAL_STORAGE,Permission::ACCESS_NETWORK_STATE,Permission::WRITE_SETTINGS,Permission::INTERNET,...,APICall::Landroid/widget/ListView.setScaleX(),APICall::Landroid/widget/ListView.getScaleX(),APICall::Landroid/widget/ScrollView.setOnApplyWindowInsetsListener(),APICall::Landroid/widget/ExpandableListView.setNextFocusUpId(),APICall::Landroid/widget/ScrollView.setSoundEffectsEnabled(),APICall::Landroid/widget/TableRow.getLeft(),APICall::Landroid/widget/HorizontalScrollView.onKeyUp(),APICall::Landroid/widget/RatingBar.isFocusable(),APICall::Landroid/preference/ListPreference.getView(),APICall::Landroid/widget/LinearLayout.computeHorizontalScrollExtent()
0,080da3f89e42250d7462e17b40535cfca9b1a6a8370a31...,2019 شاب دوزي‎,com.arabprod.aghani.douzi,10,26,1,1,1,1,1,...,0,0,0,0,0,0,0,0,0,0
1,461760796dd7789673cfaf68383da103033d54eb4a5267...,Ishas,appinventor.ai_shameertanur.Ishas,7,28,0,0,1,0,1,...,0,0,0,0,0,0,0,0,0,0
2,dab8b14c3178b15200b23e47cecb9cc26b51c87d599ac0...,Lashes&Go,net.flowww.z.sk596381,16,26,1,1,1,0,1,...,0,0,0,0,0,0,0,0,0,0
3,db802025f9ec474d79793ac2aac556d2b52162ebc493e2...,58到家,com.wuba.jiazheng,17,25,1,1,1,1,1,...,0,0,0,0,0,0,0,0,0,0
4,a44920abdd4915117412ad7695b8d95a1da5edfa513b09...,TEDDY AFRO,million.t.com.teddy.afro.com,16,27,0,1,1,0,1,...,0,0,0,0,0,0,0,0,0,0


In [16]:
run.complete()

In [17]:
run

Experiment,Id,Type,Status,Details Page,Docs Page
malware-detection-experiment3,0f19fdec-6393-40e9-80ad-332a1a256d7b,,Running,Link to Azure Machine Learning studio,Link to Documentation


In [38]:
data_asset = ml_client.data.get("MH100k_2", version="1")


In [18]:


# Load a small sample to determine input size
sample_data = pd.read_csv(data_asset.path, nrows=10)  # Load the first 10 rows
non_numerical_columns = sample_data.select_dtypes(exclude=[int, float]).columns.tolist()
sample_data=sample_data.drop(columns=non_numerical_columns)

input_size = sample_data.shape[1] - 1  # Subtract 1 for the 'CLASS' column

display(sample_data)

Unnamed: 0,API_MIN,API,Permission::WAKE_LOCK,Permission::WRITE_EXTERNAL_STORAGE,Permission::ACCESS_NETWORK_STATE,Permission::WRITE_SETTINGS,Permission::INTERNET,Intent::AUDIO_BECOMING_NOISY,APICall::Landroid/content/Intent.toUri(),APICall::Landroid/view/View.setTag(),...,APICall::Landroid/widget/ListView.setScaleX(),APICall::Landroid/widget/ListView.getScaleX(),APICall::Landroid/widget/ScrollView.setOnApplyWindowInsetsListener(),APICall::Landroid/widget/ExpandableListView.setNextFocusUpId(),APICall::Landroid/widget/ScrollView.setSoundEffectsEnabled(),APICall::Landroid/widget/TableRow.getLeft(),APICall::Landroid/widget/HorizontalScrollView.onKeyUp(),APICall::Landroid/widget/RatingBar.isFocusable(),APICall::Landroid/preference/ListPreference.getView(),APICall::Landroid/widget/LinearLayout.computeHorizontalScrollExtent()
0,10,26,1,1,1,1,1,1,1,1,...,0,0,0,0,0,0,0,0,0,0
1,7,28,0,0,1,0,1,0,0,1,...,0,0,0,0,0,0,0,0,0,0
2,16,26,1,1,1,0,1,0,1,1,...,0,0,0,0,0,0,0,0,0,0
3,17,25,1,1,1,1,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,16,27,0,1,1,0,1,0,1,1,...,0,0,0,0,0,0,0,0,0,0
5,21,29,1,1,1,0,1,0,0,1,...,0,0,0,0,0,0,0,0,0,0
6,21,29,1,1,1,1,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
7,15,23,1,0,1,0,1,0,1,1,...,0,0,0,0,0,0,0,0,0,0
8,16,21,0,1,1,0,1,0,0,1,...,0,0,0,0,0,0,0,0,0,0
9,21,29,0,0,1,0,1,0,1,1,...,0,0,0,0,0,0,0,0,0,0


In [19]:
class MalwareDetectionNN(nn.Module):
    def __init__(self, input_size):
        super(MalwareDetectionNN, self).__init__()
        self.fc1 = nn.Linear(input_size, 64)
        self.fc2 = nn.Linear(64, 32)
        self.fc3 = nn.Linear(32, 2)  # 2 output classes: benign or malicious

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

In [20]:
def evaluate_model(model, test_loader, criterion):
    model.eval()
    test_loss = 0.0
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in test_loader:
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            test_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    accuracy = correct / total
    avg_loss = test_loss / len(test_loader)
    return avg_loss, accuracy

In [41]:
def train_model_in_chunks(data_asset, chunk_size, model, criterion, optimizer,non_numerical_columns, epochs=1):
    
    chunks = pd.read_csv(data_asset.path, chunksize=chunk_size,nrows=100000)
    
  

    for chunk_idx, chunk in enumerate(chunks):
        print(f"Processing chunk {chunk_idx+1}")
        
        # Train-test split
        train_split = int(0.8 * len(chunk))
        display(non_numerical_columns)
        chunk2=chunk.drop(columns=non_numerical_columns)
        x_train = chunk2[:train_split]
        x_test = chunk2[train_split:]
        #x_train = x_train.drop(columns=non_numerical_columns)
        #x_test = x_test.drop(columns=non_numerical_columns)

        y_train = x_train['CLASS']
        y_test = x_test['CLASS']
        #all_test.append(x_test)
        #all_test = pd.concat([all_test, x_test], ignore_index=True)

        x_train = x_train.drop(columns='CLASS')
        x_test = x_test.drop(columns='CLASS')


        # Standardize the features
        scaler = StandardScaler()
        x_train = scaler.fit_transform(x_train)
        x_test = scaler.transform(x_test)

        # Save the scaler
        with open('scaler.pkl', 'wb') as f:
            pickle.dump(scaler, f)
            
        # Convert to PyTorch tensors
        X_train_tensor = torch.tensor(x_train, dtype=torch.float32)
        y_train_tensor = torch.tensor(y_train.values, dtype=torch.long)
        X_test_tensor = torch.tensor(x_test, dtype=torch.float32)
        y_test_tensor = torch.tensor(y_test.values, dtype=torch.long)

        # Create DataLoader
        train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
        test_dataset = TensorDataset(X_test_tensor, y_test_tensor)
        train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
        test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

        # Training loop
        for epoch in range(epochs):
            model.train()
            running_loss = 0.0
            for inputs, labels in train_loader:
                optimizer.zero_grad()
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                loss.backward()
                optimizer.step()
                running_loss += loss.item()

            print(f'Chunk {chunk_idx+1}, Epoch {epoch+1}, Loss: {running_loss/len(train_loader)}',flush=True)

        # Evaluate on test set after each chunk
        test_loss, test_accuracy = evaluate_model(model, test_loader, criterion)
        print(f'Chunk {chunk_idx+1}, Test Loss: {test_loss:.4f}, Test Accuracy: {test_accuracy:.4f}',flush=True)

        # Save model state after each chunk
        model_save_path = f'model_chunk_2_{chunk_idx+1}.pth'
        torch.save(model.state_dict(), model_save_path)
        print(f"Saved model state to {model_save_path}",flush=True)

        # Upload the file to the default datastore
        #run = Run.get_context()  # Get the current run context
        run.upload_file(name=f"model_outputs/{model_save_path}", path_or_stream=model_save_path)
        print(f"Uploaded model state to the default datastore at model_outputs/{model_save_path}")



        # Explicitly delete the chunk and call garbage collection
        del chunk, x_train, x_test, y_train, y_test
        del X_train_tensor, y_train_tensor, X_test_tensor, y_test_tensor
        del train_dataset, test_dataset, train_loader, test_loader
        gc.collect()

        # Optionally: Load model state before processing the next chunk (if needed)
        # model.load_state_dict(torch.load(model_save_path))


NameError: name 'chunk_idx' is not defined

Evaluate fn

In [22]:
run.experiment.workspace

Workspace.create(name='stefan', subscription_id='439ab49f-8192-408d-b924-846fbe36dfc7', resource_group='stefanshkenderov')

In [23]:
model_path

NameError: name 'model_path' is not defined

In [None]:
model_path

In [25]:
# Initialize model, loss function, and optimizer
model = MalwareDetectionNN(input_size)

#model = Model(ws, name='MH100K_PartLast_model', version=1)
#model_path = model.download(exist_ok=True)

# Load your model class definition
loaded_model = MalwareDetectionNN(input_size)  # Replace with your model class
#loaded_model.load_state_dict(torch.load(model_path))




In [26]:
# Set the chunk size
chunk_size = 5000 #incr and experiment


criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(loaded_model.parameters(), lr=0.001)

In [None]:
criterion

In [42]:
train_model_in_chunks(data_asset, chunk_size, loaded_model, criterion, optimizer,non_numerical_columns,10)

Processing chunk 1


['SHA256', 'NOME', 'PACOTE']

Chunk 1, Epoch 1, Loss: 0.025994155964115636
Chunk 1, Epoch 2, Loss: 0.032978072073077784
Chunk 1, Epoch 3, Loss: 0.023235523393261247
Chunk 1, Epoch 4, Loss: 0.029372438060527202
Chunk 1, Epoch 5, Loss: 0.030035553243011236
Chunk 1, Epoch 6, Loss: 0.0262973186573945
Chunk 1, Epoch 7, Loss: 0.016291915625333785
Chunk 1, Epoch 8, Loss: 0.019696548927313415
Chunk 1, Epoch 9, Loss: 0.016265867715672358
Chunk 1, Epoch 10, Loss: 0.018911026933186803
Chunk 1, Test Loss: 0.0241, Test Accuracy: 0.9920
Saved model state to model_chunk_2_1.pth
Uploaded model state to the default datastore at model_outputs/model_chunk_2_1.pth
Processing chunk 2


['SHA256', 'NOME', 'PACOTE']

Chunk 2, Epoch 1, Loss: 0.020530974418152225
Chunk 2, Epoch 2, Loss: 0.0018197962351504575
Chunk 2, Epoch 3, Loss: 0.0012181349907374184
Chunk 2, Epoch 4, Loss: 0.0011798542228274797
Chunk 2, Epoch 5, Loss: 0.001163746448863579
Chunk 2, Epoch 6, Loss: 0.0011505964817834168
Chunk 2, Epoch 7, Loss: 0.0011418970125634188
Chunk 2, Epoch 8, Loss: 0.0011237351611893
Chunk 2, Epoch 9, Loss: 0.0005685558645931295
Chunk 2, Epoch 10, Loss: 0.0005658922440028213
Chunk 2, Test Loss: 0.0023, Test Accuracy: 0.9990
Saved model state to model_chunk_2_2.pth
Uploaded model state to the default datastore at model_outputs/model_chunk_2_2.pth
Processing chunk 3


['SHA256', 'NOME', 'PACOTE']

Chunk 3, Epoch 1, Loss: 0.0009317566948766682
Chunk 3, Epoch 2, Loss: 0.0005557449674110621
Chunk 3, Epoch 3, Loss: 0.0005540076461321935
Chunk 3, Epoch 4, Loss: 0.0005523894463601362
Chunk 3, Epoch 5, Loss: 0.0005507573941940863
Chunk 3, Epoch 6, Loss: 0.0005491272110099957
Chunk 3, Epoch 7, Loss: 0.00054745181948382
Chunk 3, Epoch 8, Loss: 0.0005457598819257025
Chunk 3, Epoch 9, Loss: 0.0005440029204239547
Chunk 3, Epoch 10, Loss: 0.0005421434229124227
Chunk 3, Test Loss: 3.8066, Test Accuracy: 0.8020
Saved model state to model_chunk_2_3.pth
Uploaded model state to the default datastore at model_outputs/model_chunk_2_3.pth
Processing chunk 4


['SHA256', 'NOME', 'PACOTE']

Chunk 4, Epoch 1, Loss: 0.8793855150938034
Chunk 4, Epoch 2, Loss: 0.245396602332592
Chunk 4, Epoch 3, Loss: 0.1906348138153553
Chunk 4, Epoch 4, Loss: 0.1625749854147434
Chunk 4, Epoch 5, Loss: 0.14705582830309868
Chunk 4, Epoch 6, Loss: 0.14184008480608462
Chunk 4, Epoch 7, Loss: 0.13580448581278323
Chunk 4, Epoch 8, Loss: 0.12663543316721917
Chunk 4, Epoch 9, Loss: 0.11905142571032047
Chunk 4, Epoch 10, Loss: 0.11344494067877531
Chunk 4, Test Loss: 0.2038, Test Accuracy: 0.9160
Saved model state to model_chunk_2_4.pth
Uploaded model state to the default datastore at model_outputs/model_chunk_2_4.pth
Processing chunk 5


['SHA256', 'NOME', 'PACOTE']

Chunk 5, Epoch 1, Loss: 0.09096655834533027
Chunk 5, Epoch 2, Loss: 0.00305239146799817
Chunk 5, Epoch 3, Loss: 0.0018892065569336937
Chunk 5, Epoch 4, Loss: 0.0018713045791447112
Chunk 5, Epoch 5, Loss: 0.0018593452932023186
Chunk 5, Epoch 6, Loss: 0.0018451429184335754
Chunk 5, Epoch 7, Loss: 0.0008382941348412451
Chunk 5, Epoch 8, Loss: 8.117504760832617e-06
Chunk 5, Epoch 9, Loss: 7.5970095001469674e-06
Chunk 5, Epoch 10, Loss: 7.119340259476914e-06
Chunk 5, Test Loss: 0.0007, Test Accuracy: 1.0000
Saved model state to model_chunk_2_5.pth
Uploaded model state to the default datastore at model_outputs/model_chunk_2_5.pth
Processing chunk 6


['SHA256', 'NOME', 'PACOTE']

Chunk 6, Epoch 1, Loss: 0.00036861422226211894
Chunk 6, Epoch 2, Loss: 1.4376930815938493e-05
Chunk 6, Epoch 3, Loss: 1.0960766569187542e-05
Chunk 6, Epoch 4, Loss: 9.307799605345225e-06
Chunk 6, Epoch 5, Loss: 8.13554639216818e-06
Chunk 6, Epoch 6, Loss: 7.1894198174558e-06
Chunk 6, Epoch 7, Loss: 6.419186943256605e-06
Chunk 6, Epoch 8, Loss: 5.756506682560669e-06
Chunk 6, Epoch 9, Loss: 5.121298478997005e-06
Chunk 6, Epoch 10, Loss: 4.642933508655744e-06
Chunk 6, Test Loss: 0.0010, Test Accuracy: 0.9990
Saved model state to model_chunk_2_6.pth
Uploaded model state to the default datastore at model_outputs/model_chunk_2_6.pth
Processing chunk 7


['SHA256', 'NOME', 'PACOTE']

Chunk 7, Epoch 1, Loss: 0.00014402797852936723
Chunk 7, Epoch 2, Loss: 1.3913820797849752e-05
Chunk 7, Epoch 3, Loss: 9.944561493005821e-06
Chunk 7, Epoch 4, Loss: 7.93648187233309e-06
Chunk 7, Epoch 5, Loss: 6.448678672267505e-06
Chunk 7, Epoch 6, Loss: 5.45766984612861e-06
Chunk 7, Epoch 7, Loss: 4.696023547039019e-06
Chunk 7, Epoch 8, Loss: 4.0959290170476945e-06
Chunk 7, Epoch 9, Loss: 3.6133750262088426e-06
Chunk 7, Epoch 10, Loss: 3.1971509515997808e-06
Chunk 7, Test Loss: 0.0000, Test Accuracy: 1.0000
Saved model state to model_chunk_2_7.pth
Uploaded model state to the default datastore at model_outputs/model_chunk_2_7.pth
Processing chunk 8


['SHA256', 'NOME', 'PACOTE']

Chunk 8, Epoch 1, Loss: 4.3070044565032714e-05
Chunk 8, Epoch 2, Loss: 2.2040797059830196e-06
Chunk 8, Epoch 3, Loss: 1.8098672879816037e-06
Chunk 8, Epoch 4, Loss: 1.5175097354855893e-06
Chunk 8, Epoch 5, Loss: 1.3154871696556824e-06
Chunk 8, Epoch 6, Loss: 1.163200826592714e-06
Chunk 8, Epoch 7, Loss: 1.0421279696029018e-06
Chunk 8, Epoch 8, Loss: 9.421425115849758e-07
Chunk 8, Epoch 9, Loss: 8.55889467956672e-07
Chunk 8, Epoch 10, Loss: 7.817590303051958e-07
Chunk 8, Test Loss: 0.0000, Test Accuracy: 1.0000
Saved model state to model_chunk_2_8.pth
Uploaded model state to the default datastore at model_outputs/model_chunk_2_8.pth
Processing chunk 9


['SHA256', 'NOME', 'PACOTE']

Chunk 9, Epoch 1, Loss: 0.00011916580631672602
Chunk 9, Epoch 2, Loss: 2.3811251843852688e-07
Chunk 9, Epoch 3, Loss: 2.1939772746115693e-07
Chunk 9, Epoch 4, Loss: 2.0554046751719567e-07
Chunk 9, Epoch 5, Loss: 1.9353081874307065e-07
Chunk 9, Epoch 6, Loss: 1.832197568898408e-07
Chunk 9, Epoch 7, Loss: 1.7377291156250862e-07
Chunk 9, Epoch 8, Loss: 1.6513064175960323e-07
Chunk 9, Epoch 9, Loss: 1.5738239505402874e-07
Chunk 9, Epoch 10, Loss: 1.5005133890610977e-07
Chunk 9, Test Loss: 0.0000, Test Accuracy: 1.0000
Saved model state to model_chunk_2_9.pth
Uploaded model state to the default datastore at model_outputs/model_chunk_2_9.pth
Processing chunk 10


['SHA256', 'NOME', 'PACOTE']

Chunk 10, Epoch 1, Loss: 4.1932503390054165e-06
Chunk 10, Epoch 2, Loss: 5.583515934937111e-07
Chunk 10, Epoch 3, Loss: 4.564176995174307e-07
Chunk 10, Epoch 4, Loss: 3.8783571078049307e-07
Chunk 10, Epoch 5, Loss: 3.3828698972371287e-07
Chunk 10, Epoch 6, Loss: 2.9940277692830365e-07
Chunk 10, Epoch 7, Loss: 2.6793670478397755e-07
Chunk 10, Epoch 8, Loss: 2.416846131918504e-07
Chunk 10, Epoch 9, Loss: 2.2028881383384658e-07
Chunk 10, Epoch 10, Loss: 2.0005529197675286e-07
Chunk 10, Test Loss: 0.0001, Test Accuracy: 1.0000
Saved model state to model_chunk_2_10.pth
Uploaded model state to the default datastore at model_outputs/model_chunk_2_10.pth
Processing chunk 11


['SHA256', 'NOME', 'PACOTE']

Chunk 11, Epoch 1, Loss: 3.1357533286815454e-06
Chunk 11, Epoch 2, Loss: 3.625123951156439e-07
Chunk 11, Epoch 3, Loss: 2.98865787412339e-07
Chunk 11, Epoch 4, Loss: 2.5601603763192317e-07
Chunk 11, Epoch 5, Loss: 2.2514437013221312e-07
Chunk 11, Epoch 6, Loss: 1.9617948303363163e-07
Chunk 11, Epoch 7, Loss: 1.7460451503303887e-07
Chunk 11, Epoch 8, Loss: 1.5678413819131264e-07
Chunk 11, Epoch 9, Loss: 1.4146681918880688e-07
Chunk 11, Epoch 10, Loss: 1.2859306296064688e-07
Chunk 11, Test Loss: 0.0000, Test Accuracy: 1.0000
Saved model state to model_chunk_2_11.pth
Uploaded model state to the default datastore at model_outputs/model_chunk_2_11.pth
Processing chunk 12


['SHA256', 'NOME', 'PACOTE']

Chunk 12, Epoch 1, Loss: 4.090722522837709e-06
Chunk 12, Epoch 2, Loss: 1.0570570529644386e-07
Chunk 12, Epoch 3, Loss: 8.997112924369332e-08
Chunk 12, Epoch 4, Loss: 8.022624688308611e-08
Chunk 12, Epoch 5, Loss: 7.256737963423632e-08
Chunk 12, Epoch 6, Loss: 6.610051059396937e-08
Chunk 12, Epoch 7, Loss: 6.052766272368615e-08
Chunk 12, Epoch 8, Loss: 5.569982577569022e-08
Chunk 12, Epoch 9, Loss: 5.119979392098628e-08
Chunk 12, Epoch 10, Loss: 4.744479639562371e-08
Chunk 12, Test Loss: 0.0000, Test Accuracy: 1.0000
Saved model state to model_chunk_2_12.pth
Uploaded model state to the default datastore at model_outputs/model_chunk_2_12.pth
Processing chunk 13


['SHA256', 'NOME', 'PACOTE']

Chunk 13, Epoch 1, Loss: 6.542992495486288e-07
Chunk 13, Epoch 2, Loss: 2.8103414155822292e-08
Chunk 13, Epoch 3, Loss: 2.0652922726682732e-08
Chunk 13, Epoch 4, Loss: 1.7523709313849168e-08
Chunk 13, Epoch 5, Loss: 1.5407757807395227e-08
Chunk 13, Epoch 6, Loss: 1.3828244469493711e-08
Chunk 13, Epoch 7, Loss: 1.2487146452855314e-08
Chunk 13, Epoch 8, Loss: 1.1414267641285391e-08
Chunk 13, Epoch 9, Loss: 1.0460596953976164e-08
Chunk 13, Epoch 10, Loss: 9.685739547293793e-09
Chunk 13, Test Loss: 0.0000, Test Accuracy: 1.0000
Saved model state to model_chunk_2_13.pth
Uploaded model state to the default datastore at model_outputs/model_chunk_2_13.pth
Processing chunk 14


['SHA256', 'NOME', 'PACOTE']

Chunk 14, Epoch 1, Loss: 1.1154100626953323e-07
Chunk 14, Epoch 2, Loss: 2.926539618464119e-08
Chunk 14, Epoch 3, Loss: 2.253031435195396e-08
Chunk 14, Epoch 4, Loss: 1.8596503236878446e-08
Chunk 14, Epoch 5, Loss: 1.594414525918353e-08
Chunk 14, Epoch 6, Loss: 1.4036823863250447e-08
Chunk 14, Epoch 7, Loss: 1.2367910988686504e-08
Chunk 14, Epoch 8, Loss: 1.1056621710636705e-08
Chunk 14, Epoch 9, Loss: 1.0073153726253281e-08
Chunk 14, Epoch 10, Loss: 9.149288079157714e-09
Chunk 14, Test Loss: 0.0000, Test Accuracy: 1.0000
Saved model state to model_chunk_2_14.pth
Uploaded model state to the default datastore at model_outputs/model_chunk_2_14.pth
Processing chunk 15


['SHA256', 'NOME', 'PACOTE']

Chunk 15, Epoch 1, Loss: 0.7815053734183312
Chunk 15, Epoch 2, Loss: 0.3067527239322662
Chunk 15, Epoch 3, Loss: 0.26665974551439287
Chunk 15, Epoch 4, Loss: 0.24849051076173784
Chunk 15, Epoch 5, Loss: 0.23793535518646242
Chunk 15, Epoch 6, Loss: 0.23223498374223708
Chunk 15, Epoch 7, Loss: 0.21351465731859207
Chunk 15, Epoch 8, Loss: 0.2051488822698593
Chunk 15, Epoch 9, Loss: 0.21054000821709634
Chunk 15, Epoch 10, Loss: 0.1968933511376381
Chunk 15, Test Loss: 0.5454, Test Accuracy: 0.8980
Saved model state to model_chunk_2_15.pth
Uploaded model state to the default datastore at model_outputs/model_chunk_2_15.pth
Processing chunk 16


['SHA256', 'NOME', 'PACOTE']

Chunk 16, Epoch 1, Loss: 0.2851715098619461
Chunk 16, Epoch 2, Loss: 0.18312417221069335
Chunk 16, Epoch 3, Loss: 0.15347559052705764
Chunk 16, Epoch 4, Loss: 0.13075471675395967
Chunk 16, Epoch 5, Loss: 0.10520477679744362
Chunk 16, Epoch 6, Loss: 0.09072236268222332
Chunk 16, Epoch 7, Loss: 0.0834415926039219
Chunk 16, Epoch 8, Loss: 0.07703522159717976
Chunk 16, Epoch 9, Loss: 0.07878787621110678
Chunk 16, Epoch 10, Loss: 0.07571898213215172
Chunk 16, Test Loss: 0.3154, Test Accuracy: 0.9430
Saved model state to model_chunk_2_16.pth
Uploaded model state to the default datastore at model_outputs/model_chunk_2_16.pth
Processing chunk 17


['SHA256', 'NOME', 'PACOTE']

Chunk 17, Epoch 1, Loss: 0.2550895735323429
Chunk 17, Epoch 2, Loss: 0.14362054784595965
Chunk 17, Epoch 3, Loss: 0.11076737549901008
Chunk 17, Epoch 4, Loss: 0.09918920667469501
Chunk 17, Epoch 5, Loss: 0.0983770554214716
Chunk 17, Epoch 6, Loss: 0.0736574835702777
Chunk 17, Epoch 7, Loss: 0.07743475981801748
Chunk 17, Epoch 8, Loss: 0.06443282458931207
Chunk 17, Epoch 9, Loss: 0.07299622130580247
Chunk 17, Epoch 10, Loss: 0.05779522995278239
Chunk 17, Test Loss: 0.2956, Test Accuracy: 0.9370
Saved model state to model_chunk_2_17.pth
Uploaded model state to the default datastore at model_outputs/model_chunk_2_17.pth
Processing chunk 18


['SHA256', 'NOME', 'PACOTE']

Chunk 18, Epoch 1, Loss: 0.23456763884425164
Chunk 18, Epoch 2, Loss: 0.12277112905681133
Chunk 18, Epoch 3, Loss: 0.09228650248795747
Chunk 18, Epoch 4, Loss: 0.08549544218927622
Chunk 18, Epoch 5, Loss: 0.06599832506664097
Chunk 18, Epoch 6, Loss: 0.07393617352843285
Chunk 18, Epoch 7, Loss: 0.0770658339355141
Chunk 18, Epoch 8, Loss: 0.06483590650185943
Chunk 18, Epoch 9, Loss: 0.054882432612590494
Chunk 18, Epoch 10, Loss: 0.0956468245111173
Chunk 18, Test Loss: 0.3268, Test Accuracy: 0.9420
Saved model state to model_chunk_2_18.pth
Uploaded model state to the default datastore at model_outputs/model_chunk_2_18.pth
Processing chunk 19


['SHA256', 'NOME', 'PACOTE']

Chunk 19, Epoch 1, Loss: 0.18102875897660853
Chunk 19, Epoch 2, Loss: 0.09720077929645776
Chunk 19, Epoch 3, Loss: 0.08192997851222754
Chunk 19, Epoch 4, Loss: 0.06579663744196296
Chunk 19, Epoch 5, Loss: 0.05743206928763538
Chunk 19, Epoch 6, Loss: 0.056573639383539556
Chunk 19, Epoch 7, Loss: 0.04324289306253195
Chunk 19, Epoch 8, Loss: 0.05129910661606118
Chunk 19, Epoch 9, Loss: 0.055747964109759775
Chunk 19, Epoch 10, Loss: 0.045064351712353526
Chunk 19, Test Loss: 0.4786, Test Accuracy: 0.9240
Saved model state to model_chunk_2_19.pth
Uploaded model state to the default datastore at model_outputs/model_chunk_2_19.pth
Processing chunk 20


['SHA256', 'NOME', 'PACOTE']

Chunk 20, Epoch 1, Loss: 0.1645863873362541
Chunk 20, Epoch 2, Loss: 0.08047849850542843
Chunk 20, Epoch 3, Loss: 0.05275656422553584
Chunk 20, Epoch 4, Loss: 0.04683008658885956
Chunk 20, Epoch 5, Loss: 0.05112621170282364
Chunk 20, Epoch 6, Loss: 0.041905271855182946
Chunk 20, Epoch 7, Loss: 0.03733301687776111
Chunk 20, Epoch 8, Loss: 0.05673723075632006
Chunk 20, Epoch 9, Loss: 0.07133313863258808
Chunk 20, Epoch 10, Loss: 0.047762916221283376
Chunk 20, Test Loss: 0.2204, Test Accuracy: 0.9470
Saved model state to model_chunk_2_20.pth
Uploaded model state to the default datastore at model_outputs/model_chunk_2_20.pth


In [35]:
model_save_path="MH100k_1_out.pth"

In [43]:
torch.save(model.state_dict(), 'MH100k_2_out.pth')

In [36]:
# Ensure the file is created
if os.path.exists(model_save_path):
    print(f"File {model_save_path} exists. Proceeding with upload.")
    # Upload the file
    run.upload_file(name=f"model_outputs/{model_save_path}", path_or_stream=model_save_path)
    print("File uploaded successfully.")
else:
    print(f"File {model_save_path} does not exist. Skipping upload.")
    #run.log("warning", f"File {model_save_path} was not found. Skipping upload.")


File MH100k_1_out.pth does not exist. Skipping upload.


model_save_path = f'mh100kFinal.pth'
torch.save(model.state_dict(), model_save_path)
print(f"Saved model state to {model_save_path}",flush=True)

# Upload the file to the default datastore
#run = Run.get_context()  # Get the current run context
run.upload_file(name=f"model_outputs/{model_save_path}", path_or_stream=model_save_path)
print(f"Uploaded model state to the default datastore at model_outputs/{model_save_path}")

In [None]:
 # Upload the file to the default datastore
model_save_path="MH100k_final_attempt2.pth"
#run = Run.get_context()  # Get the current run context
run.upload_file(name=f"model_outputs/{model_save_path}", path_or_stream=model_save_path)
print(f"Uploaded model state to the default datastore at {model_save_path}")



In [None]:
run

In [44]:
# Register the model as a new version
ws2 = run.experiment.workspace  # Get the workspace from the current run context
registered_model = Model.register(workspace=ws2,
                                    model_path=model_save_path,  # this is the local path
                                    model_name='MH100K_PartLast_model',  # this is the name the model is registered as
                                    
                                    description=f'Model trained with chunks')
print(f"Registered model as new version")

Registering model MH100K_PartLast_model
Registered model as new version


## convert to oonx and tflite


In [23]:
model = MalwareDetectionNN(input_size)

In [25]:
model.load_state_dict(torch.load('mh100kFinal.pth'))

EOFError: Ran out of input

In [45]:
dummy_input = torch.randn(1, input_size)
torch.onnx.export(model, dummy_input, "MH100k.onnx", 
                  input_names=['input'], output_names=['output'])

In [2]:
pip install onnx

Collecting onnx
  Downloading onnx-1.16.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (15.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m15.9/15.9 MB[0m [31m73.7 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hCollecting protobuf>=3.20.2
  Downloading protobuf-5.27.2-cp38-abi3-manylinux2014_x86_64.whl (309 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m309.3/309.3 kB[0m [31m34.7 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: protobuf, onnx
  Attempting uninstall: protobuf
    Found existing installation: protobuf 3.19.6
    Uninstalling protobuf-3.19.6:
      Successfully uninstalled protobuf-3.19.6
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
tensorflow 2.11.1 requires protobuf<3.20,>=3.9.2, but you have protobuf 5.27.2 which is incompatible.
tensorboard 2.11.2 requires prot

In [3]:
pip install protobuf==3.20.3

Collecting protobuf==3.20.3
  Downloading protobuf-3.20.3-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.whl (1.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.0/1.0 MB[0m [31m43.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: protobuf
  Attempting uninstall: protobuf
    Found existing installation: protobuf 5.27.2
    Uninstalling protobuf-5.27.2:
      Successfully uninstalled protobuf-5.27.2
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
tensorflow 2.11.1 requires protobuf<3.20,>=3.9.2, but you have protobuf 3.20.3 which is incompatible.
azureml-mlflow 1.51.0 requires azure-storage-blob<=12.13.0,>=12.5.0, but you have azure-storage-blob 12.20.0 which is incompatible.[0m[31m
[0mSuccessfully installed protobuf-3.20.3
Note: you may need to restart the kernel to use updated packages.


In [2]:
pip install sng4onnx

Collecting sng4onnx
  Downloading sng4onnx-1.0.4-py3-none-any.whl (5.9 kB)
Installing collected packages: sng4onnx
Successfully installed sng4onnx-1.0.4
Note: you may need to restart the kernel to use updated packages.


In [1]:
from onnx2tf import convert


In [2]:

# Path to your ONNX model
onnx_model_path = "MH100k.onnx"

# Output path for the converted TensorFlow model
output_path = "path_to_save_your_model2"

# Convert the ONNX model to TensorFlow
convert(
    input_onnx_file_path=onnx_model_path,
    output_folder_path=output_path
)



Traceback (most recent call last):
  File "/anaconda/envs/azureml_py38_PT_TF/lib/python3.8/site-packages/onnx2tf/onnx2tf.py", line 614, in convert
    result = subprocess.check_output(
  File "/anaconda/envs/azureml_py38_PT_TF/lib/python3.8/subprocess.py", line 411, in check_output
    return run(*popenargs, stdout=PIPE, timeout=timeout, check=True,
  File "/anaconda/envs/azureml_py38_PT_TF/lib/python3.8/subprocess.py", line 489, in run
    with Popen(*popenargs, **kwargs) as process:
  File "/anaconda/envs/azureml_py38_PT_TF/lib/python3.8/subprocess.py", line 854, in __init__
    self._execute_child(args, executable, preexec_fn, close_fds,
  File "/anaconda/envs/azureml_py38_PT_TF/lib/python3.8/subprocess.py", line 1702, in _execute_child
    raise child_exception_type(errno_num, err_msg, err_filename)
FileNotFoundError: [Errno 2] No such file or directory: 'onnxsim'


[32mAutomatic generation of each OP name complete![0m


[32mINFO:[0m [32minput_op_name[0m: input [32mshape[0

<keras.engine.functional.Functional at 0x7efdef8f1eb0>