Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

training fails on VPN dataset with a ValueError #42

Open
gsankara opened this issue Aug 3, 2023 · 1 comment
Open

training fails on VPN dataset with a ValueError #42

gsankara opened this issue Aug 3, 2023 · 1 comment

Comments

@gsankara
Copy link

gsankara commented Aug 3, 2023

I see a ValueError: Please pass features or at least one example when writing data` at the end of train_cnn when run on VPN dataset. I have not modified the code. I faced NaN error and set under sampling to False. Then I encountered this one.

Here is the detailed output
``Trainer(val_check_interval=1.0)` was configured so validation will run at the end of the training epoch..

| Name | Type | Params

0 | conv1 | Sequential | 1.0 K
1 | conv2 | Sequential | 200 K
2 | max_pool | MaxPool1d | 0
3 | fc1 | Sequential | 9.9 M
4 | fc2 | Sequential | 20.1 K
5 | fc3 | Sequential | 5.0 K
6 | out | Linear | 867

10.1 M Trainable params
0 Non-trainable params
10.1 M Total params
40.430 Total estimated model params size (MB)
Using custom data configuration train.parquet-2c3be5e9d214c057
Downloading and preparing dataset parquet/train.parquet to /home/rvn/.cache/huggingface/datasets/parquet/train.parquet-2c3be5e9d214c057/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec...
Downloading data files: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 3663.15it/s]
Extracting data files: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 565.27it/s]
Traceback (most recent call last):
File "/home/rvn/Deep-Packet/train_cnn.py", line 33, in
main()
File "/home/rvn/miniconda3/envs/deep_packet/lib/python3.10/site-packages/click/core.py", line 1130, in call
return self.main(*args, **kwargs)
File "/home/rvn/miniconda3/envs/deep_packet/lib/python3.10/site-packages/click/core.py", line 1055, in main
rv = self.invoke(ctx)
File "/home/rvn/miniconda3/envs/deep_packet/lib/python3.10/site-packages/click/core.py", line 1404, in invoke
return ctx.invoke(self.callback, **ctx.params)
File "/home/rvn/miniconda3/envs/deep_packet/lib/python3.10/site-packages/click/core.py", line 760, in invoke
return __callback(*args, **kwargs)
File "/home/rvn/Deep-Packet/train_cnn.py", line 25, in main
train_application_classification_cnn_model(data_path, model_path)
File "/home/rvn/Deep-Packet/ml/utils.py", line 117, in train_application_classification_cnn_model
train_cnn(
File "/home/rvn/Deep-Packet/ml/utils.py", line 58, in train_cnn
trainer.fit(model)
File "/home/rvn/miniconda3/envs/deep_packet/lib/python3.10/site-packages/pytorch_lightning/trainer/trainer.py", line 696, in fit
self._call_and_handle_interrupt(
File "/home/rvn/miniconda3/envs/deep_packet/lib/python3.10/site-packages/pytorch_lightning/trainer/trainer.py", line 650, in _call_and_handle_interrupt
return trainer_fn(*args, **kwargs)
File "/home/rvn/miniconda3/envs/deep_packet/lib/python3.10/site-packages/pytorch_lightning/trainer/trainer.py", line 735, in _fit_impl
results = self._run(model, ckpt_path=self.ckpt_path)
File "/home/rvn/miniconda3/envs/deep_packet/lib/python3.10/site-packages/pytorch_lightning/trainer/trainer.py", line 1166, in _run
results = self._run_stage()
File "/home/rvn/miniconda3/envs/deep_packet/lib/python3.10/site-packages/pytorch_lightning/trainer/trainer.py", line 1252, in _run_stage
return self._run_train()
File "/home/rvn/miniconda3/envs/deep_packet/lib/python3.10/site-packages/pytorch_lightning/trainer/trainer.py", line 1283, in _run_train
self.fit_loop.run()
File "/home/rvn/miniconda3/envs/deep_packet/lib/python3.10/site-packages/pytorch_lightning/loops/loop.py", line 195, in run
self.on_run_start(*args, **kwargs)
File "/home/rvn/miniconda3/envs/deep_packet/lib/python3.10/site-packages/pytorch_lightning/loops/fit_loop.py", line 211, in on_run_start
self.trainer.reset_train_dataloader(self.trainer.lightning_module)
File "/home/rvn/miniconda3/envs/deep_packet/lib/python3.10/site-packages/pytorch_lightning/trainer/trainer.py", line 1812, in reset_train_dataloader
self.train_dataloader = self._data_connector._request_dataloader(RunningStage.TRAINING)
File "/home/rvn/miniconda3/envs/deep_packet/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/data_connector.py", line 453, in _request_dataloader
dataloader = source.dataloader()
File "/home/rvn/miniconda3/envs/deep_packet/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/data_connector.py", line 526, in dataloader
return self.instance.trainer._call_lightning_module_hook(self.name, pl_module=self.instance)
File "/home/rvn/miniconda3/envs/deep_packet/lib/python3.10/site-packages/pytorch_lightning/trainer/trainer.py", line 1550, in _call_lightning_module_hook
output = fn(*args, **kwargs)
File "/home/rvn/Deep-Packet/ml/model.py", line 101, in train_dataloader
dataset_dict = datasets.load_dataset(self.hparams.data_path)
File "/home/rvn/miniconda3/envs/deep_packet/lib/python3.10/site-packages/datasets/load.py", line 1698, in load_dataset
builder_instance.download_and_prepare(
File "/home/rvn/miniconda3/envs/deep_packet/lib/python3.10/site-packages/datasets/builder.py", line 807, in download_and_prepare
self._download_and_prepare(
File "/home/rvn/miniconda3/envs/deep_packet/lib/python3.10/site-packages/datasets/builder.py", line 898, in _download_and_prepare
self._prepare_split(split_generator, **prepare_split_kwargs)
File "/home/rvn/miniconda3/envs/deep_packet/lib/python3.10/site-packages/datasets/builder.py", line 1516, in _prepare_split
num_examples, num_bytes = writer.finalize()
File "/home/rvn/miniconda3/envs/deep_packet/lib/python3.10/site-packages/datasets/arrow_writer.py", line 559, in finalize
raise ValueError("Please pass features or at least one example when writing data")
ValueError: Please pass features or at least one example when writing data`

@365ms
Copy link

365ms commented Aug 17, 2023

I faced NaN error and set under sampling to False.
You dont need to set False. Under sampling is quiet an important process.
Try to correct the processed_data ,open one of the .json file,check the app_label and traffic_label,if value is null that means u should add the specific prefix-id according to your dataset in utlis.py.
PREFIX_TO_APP_ID = {
# AIM chat
"aim_chat_3a": 0,
"aim_chat_3b": 0,
"aimchat1": 0,
"aimchat2": 0,
"vpn_aim_chat1a":0,
"vpn_aim_chat1b":0,
# Email
"email1a": 1,
"email1b": 1,
"email2a": 1,
"email2b": 1,
"vpn_email2a": 1,
"vpn_email2b": 1,
# Facebook
"facebook_audio1a": 2,
"facebook_audio1b": 2,
"facebook_audio2a": 2,
"facebook_audio2b": 2,
"facebook_audio3": 2,
"facebook_audio4": 2,
"facebook_chat_4a": 2,
"facebook_chat_4b": 2,
"facebook_video1a": 2,
"facebook_video1b": 2,
"facebook_video2a": 2,
"facebook_video2b": 2,
"facebookchat1": 2,
"facebookchat2": 2,
"facebookchat3": 2,
"vpn_facebook_audio2":2,
"vpn_facebook_chat1a":2,
"vpn_facebook_chat1b":2,
# FTPS
"ftps_down_1a": 3,
"ftps_down_1b": 3,
"ftps_up_2a": 3,
"ftps_up_2b": 3,
"vpn_ftps_A":3,
"vpn_ftps_B":3,
# Gmail
"gmailchat1": 4,
"gmailchat2": 4,
"gmailchat3": 4,
# Hangouts
"hangout_chat_4b": 5,
"hangouts_audio1a": 5,
"hangouts_audio1b": 5,
"hangouts_audio2a": 5,
"hangouts_audio2b": 5,
"hangouts_audio3": 5,
"hangouts_audio4": 5,
"hangouts_chat_4a": 5,
"hangouts_video1b": 5,
"hangouts_video2a": 5,
"hangouts_video2b": 5,
"vpn_hangouts_audio1":5,
"vpn_hangouts_audio2":5,
"vpn_hangouts_chat1a":5,
"vpn_hangouts_chat1b":5,
# ICQ
"icq_chat_3a": 6,
"icq_chat_3b": 6,
"icqchat1": 6,
"icqchat2": 6,
# Netflix
"netflix1": 7,
"netflix2": 7,
"netflix3": 7,
"netflix4": 7,
# SCP
"scp1": 8,
"scpdown1": 8,
"scpdown2": 8,
"scpdown3": 8,
"scpdown4": 8,
"scpdown5": 8,
"scpdown6": 8,
"scpup1": 8,
"scpup2": 8,
"scpup3": 8,
"scpup5": 8,
"scpup6": 8,
# SFTP
"sftp1": 9,
"sftp_down_3a": 9,
"sftp_down_3b": 9,
"sftp_up_2a": 9,
"sftp_up_2b": 9,
"sftpdown1": 9,
"sftpdown2": 9,
"sftpup1": 9,
# Skype
"skype_audio1a": 10,
"skype_audio1b": 10,
"skype_audio2a": 10,
"skype_audio2b": 10,
"skype_audio3": 10,
"skype_audio4": 10,
"skype_chat1a": 10,
"skype_chat1b": 10,
"skype_file1": 10,
"skype_file2": 10,
"skype_file3": 10,
"skype_file4": 10,
"skype_file5": 10,
"skype_file6": 10,
"skype_file7": 10,
"skype_file8": 10,
"skype_video1a": 10,
"skype_video1b": 10,
"skype_video2a": 10,
"skype_video2b": 10,
# Spotify
"spotify1": 11,
"spotify2": 11,
"spotify3": 11,
"spotify4": 11,
# Vimeo
"vimeo1": 12,
"vimeo2": 12,
"vimeo3": 12,
"vimeo4": 12,
# Voipbuster
"voipbuster1b": 13,
"voipbuster2b": 13,
"voipbuster3b": 13,
"voipbuster_4a": 13,
"voipbuster_4b": 13,
# Youtube
"youtube1": 14,
"youtube2": 14,
"youtube3": 14,
"youtube4": 14,
"youtube5": 14,
"youtube6": 14,
"youtubehtml5_1": 14,
#bittorrent
"vpn_bittorrent":15,
}
The dataset i use is VPN-PCAPS-01.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

2 participants